From fe052a1810ec4687ee7d606290561af504047707 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Jun 2015 13:08:19 +0300 Subject: target: Use struct t10_pi_tuple Its not a good idea to keep target specific definition of the same t10-pi tuple. (Fix v4.2-rc1 patch fuzz - nab) Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 09e682b..db70342 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -754,7 +754,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_link_magic = SE_DEV_LINK_MAGIC; dev->se_hba = hba; dev->transport = hba->backend->ops; - dev->prot_length = sizeof(struct se_dif_v1_tuple); + dev->prot_length = sizeof(struct t10_pi_tuple); dev->hba_index = hba->hba_index; INIT_LIST_HEAD(&dev->dev_list); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index e318ddb..ac72150 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1191,7 +1191,7 @@ void sbc_dif_generate(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_dif_v1_tuple *sdt; + struct t10_pi_tuple *sdt; struct scatterlist *dsg = cmd->t_data_sg, *psg; sector_t sector = cmd->t_task_lba; void *daddr, *paddr; @@ -1203,7 +1203,7 @@ sbc_dif_generate(struct se_cmd *cmd) daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; for (j = 0; j < psg->length; - j += sizeof(struct se_dif_v1_tuple)) { + j += sizeof(*sdt)) { __u16 crc; unsigned int avail; @@ -1256,7 +1256,7 @@ sbc_dif_generate(struct se_cmd *cmd) } static sense_reason_t -sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt, +sbc_dif_v1_verify(struct se_cmd *cmd, struct t10_pi_tuple *sdt, __u16 crc, sector_t sector, unsigned int ei_lba) { __be16 csum; @@ -1346,7 +1346,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, unsigned int ei_lba, struct scatterlist *psg, int psg_off) { struct se_device *dev = cmd->se_dev; - struct se_dif_v1_tuple *sdt; + struct t10_pi_tuple *sdt; struct scatterlist *dsg = cmd->t_data_sg; sector_t sector = start; void *daddr, *paddr; @@ -1361,7 +1361,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, for (i = psg_off; i < psg->length && sector < start + sectors; - i += sizeof(struct se_dif_v1_tuple)) { + i += sizeof(*sdt)) { __u16 crc; unsigned int avail; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 17ae2d6..a681644 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -426,12 +427,6 @@ enum target_core_dif_check { TARGET_DIF_CHECK_REFTAG = 0x1 << 2, }; -struct se_dif_v1_tuple { - __be16 guard_tag; - __be16 app_tag; - __be32 ref_tag; -}; - /* for sam_task_attr */ #define TCM_SIMPLE_TAG 0x20 #define TCM_HEAD_TAG 0x21 -- cgit v0.10.2 From b7446cacfb433f5e89ff94afecbc349e404aee21 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 18 Jun 2015 11:43:37 +0200 Subject: tcm_loop: Remove SAS vestigies tcm_loop is able to emulate several protocols, so remove last vestigies of the SAS protocol. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index a556bde..b179d93 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -526,7 +526,7 @@ static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg) static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg) { /* - * Return the passed NAA identifier for the SAS Target Port + * Return the passed NAA identifier for the Target Port */ return &tl_tpg(se_tpg)->tl_hba->tl_wwn_address[0]; } @@ -845,7 +845,7 @@ static int tcm_loop_make_nexus( transport_free_session(tl_nexus->se_sess); goto out; } - /* Now, register the SAS I_T Nexus as active. */ + /* Now, register the I_T Nexus as active. */ transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl, tl_nexus->se_sess, tl_nexus); tl_tpg->tl_nexus = tl_nexus; @@ -884,7 +884,7 @@ static int tcm_loop_drop_nexus( " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba), tl_nexus->se_sess->se_node_acl->initiatorname); /* - * Release the SCSI I_T Nexus to the emulated SAS Target Port + * Release the SCSI I_T Nexus to the emulated Target Port */ transport_deregister_session(tl_nexus->se_sess); tpg->tl_nexus = NULL; @@ -1077,7 +1077,7 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( tl_tpg->tl_hba = tl_hba; tl_tpg->tl_tpgt = tpgt; /* - * Register the tl_tpg as a emulated SAS TCM Target Endpoint + * Register the tl_tpg as a emulated TCM Target Endpoint */ ret = core_tpg_register(wwn, &tl_tpg->tl_se_tpg, tl_hba->tl_proto_id); if (ret < 0) @@ -1102,11 +1102,11 @@ static void tcm_loop_drop_naa_tpg( tl_hba = tl_tpg->tl_hba; tpgt = tl_tpg->tl_tpgt; /* - * Release the I_T Nexus for the Virtual SAS link if present + * Release the I_T Nexus for the Virtual target link if present */ tcm_loop_drop_nexus(tl_tpg); /* - * Deregister the tl_tpg as a emulated SAS TCM Target Endpoint + * Deregister the tl_tpg as a emulated TCM Target Endpoint */ core_tpg_deregister(se_tpg); @@ -1199,8 +1199,9 @@ static void tcm_loop_drop_scsi_hba( struct tcm_loop_hba, tl_hba_wwn); pr_debug("TCM_Loop_ConfigFS: Deallocating emulated Target" - " SAS Address: %s at Linux/SCSI Host ID: %d\n", - tl_hba->tl_wwn_address, tl_hba->sh->host_no); + " %s Address: %s at Linux/SCSI Host ID: %d\n", + tcm_loop_dump_proto_id(tl_hba), tl_hba->tl_wwn_address, + tl_hba->sh->host_no); /* * Call device_unregister() on the original tl_hba->dev. * tcm_loop_fabric_scsi.c:tcm_loop_release_adapter() will -- cgit v0.10.2 From e986a35aba67558381d5cec59a14c4d0b20f0d47 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 18 Jun 2015 11:43:38 +0200 Subject: tcm_loop: Send I_T_NEXUS_LOSS_OCCURRED UA If the virtual SAS link is set to 'offline' we should be queueing an I_T_NEXUS_LOSS_OCCURRED UA. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index b179d93..5bc85ff 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -1034,6 +1034,11 @@ static ssize_t tcm_loop_tpg_store_transport_status( } if (!strncmp(page, "offline", 7)) { tl_tpg->tl_transport_status = TCM_TRANSPORT_OFFLINE; + if (tl_tpg->tl_nexus) { + struct se_session *tl_sess = tl_tpg->tl_nexus->se_sess; + + core_allocate_nexus_loss_ua(tl_sess->se_node_acl); + } return count; } return -EINVAL; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index babde4a..2d0381d 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -41,6 +41,7 @@ #include "target_core_internal.h" #include "target_core_alua.h" #include "target_core_pr.h" +#include "target_core_ua.h" extern struct se_device *g_lun0_dev; @@ -83,6 +84,22 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( } EXPORT_SYMBOL(core_tpg_get_initiator_node_acl); +void core_allocate_nexus_loss_ua( + struct se_node_acl *nacl) +{ + struct se_dev_entry *deve; + + if (!nacl) + return; + + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) + core_scsi3_ua_allocate(deve, 0x29, + ASCQ_29H_NEXUS_LOSS_OCCURRED); + rcu_read_unlock(); +} +EXPORT_SYMBOL(core_allocate_nexus_loss_ua); + /* core_tpg_add_node_to_devs(): * * diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 18afef9..69355fe 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -152,6 +152,7 @@ int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u64); +void core_allocate_nexus_loss_ua(struct se_node_acl *acl); struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, unsigned char *); -- cgit v0.10.2 From 46d5bd62ef9e3d6e2018963cbb725c91f864922d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 8 Jul 2015 17:58:50 +0300 Subject: target: Inline transport_get_sense_codes() Inline this function in its call site since it performs a trivial task and since it is only called once. Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ce8574b..b6708d1 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2615,17 +2615,6 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) } EXPORT_SYMBOL(transport_wait_for_tasks); -static int transport_get_sense_codes( - struct se_cmd *cmd, - u8 *asc, - u8 *ascq) -{ - *asc = cmd->scsi_asc; - *ascq = cmd->scsi_ascq; - - return 0; -} - static void transport_err_sector_info(unsigned char *buffer, sector_t bad_sector) { @@ -2819,9 +2808,8 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; /* Not Ready */ buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY; - transport_get_sense_codes(cmd, &asc, &ascq); - buffer[SPC_ASC_KEY_OFFSET] = asc; - buffer[SPC_ASCQ_KEY_OFFSET] = ascq; + buffer[SPC_ASC_KEY_OFFSET] = cmd->scsi_asc; + buffer[SPC_ASCQ_KEY_OFFSET] = cmd->scsi_ascq; break; case TCM_MISCOMPARE_VERIFY: /* CURRENT ERROR */ -- cgit v0.10.2 From ab78fef4d5f79134042ae0e1e2c259e1226aa5bd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 8 Jul 2015 17:58:51 +0300 Subject: target: Split transport_send_check_condition_and_sense() Move the code for translating a sense_reason_t code into a SCSI status ASC and ASCQ codes from transport_send_check_condition_and_sense() into the new function translate_sense_reason(). Convert the switch statement that performs the translation into table-driven code. Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index b6708d1..6ef44c9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2628,13 +2628,155 @@ void transport_err_sector_info(unsigned char *buffer, sector_t bad_sector) put_unaligned_be64(bad_sector, &buffer[12]); } +struct sense_info { + u8 key; + u8 asc; + u8 ascq; + bool add_sector_info; +}; + +static const struct sense_info sense_info_table[] = { + [TCM_NO_SENSE] = { + .key = NOT_READY + }, + [TCM_NON_EXISTENT_LUN] = { + .key = ILLEGAL_REQUEST, + .asc = 0x25 /* LOGICAL UNIT NOT SUPPORTED */ + }, + [TCM_UNSUPPORTED_SCSI_OPCODE] = { + .key = ILLEGAL_REQUEST, + .asc = 0x20, /* INVALID COMMAND OPERATION CODE */ + }, + [TCM_SECTOR_COUNT_TOO_MANY] = { + .key = ILLEGAL_REQUEST, + .asc = 0x20, /* INVALID COMMAND OPERATION CODE */ + }, + [TCM_UNKNOWN_MODE_PAGE] = { + .key = ILLEGAL_REQUEST, + .asc = 0x24, /* INVALID FIELD IN CDB */ + }, + [TCM_CHECK_CONDITION_ABORT_CMD] = { + .key = ABORTED_COMMAND, + .asc = 0x29, /* BUS DEVICE RESET FUNCTION OCCURRED */ + .ascq = 0x03, + }, + [TCM_INCORRECT_AMOUNT_OF_DATA] = { + .key = ABORTED_COMMAND, + .asc = 0x0c, /* WRITE ERROR */ + .ascq = 0x0d, /* NOT ENOUGH UNSOLICITED DATA */ + }, + [TCM_INVALID_CDB_FIELD] = { + .key = ILLEGAL_REQUEST, + .asc = 0x24, /* INVALID FIELD IN CDB */ + }, + [TCM_INVALID_PARAMETER_LIST] = { + .key = ILLEGAL_REQUEST, + .asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */ + }, + [TCM_PARAMETER_LIST_LENGTH_ERROR] = { + .key = ILLEGAL_REQUEST, + .asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */ + }, + [TCM_UNEXPECTED_UNSOLICITED_DATA] = { + .key = ILLEGAL_REQUEST, + .asc = 0x0c, /* WRITE ERROR */ + .ascq = 0x0c, /* UNEXPECTED_UNSOLICITED_DATA */ + }, + [TCM_SERVICE_CRC_ERROR] = { + .key = ABORTED_COMMAND, + .asc = 0x47, /* PROTOCOL SERVICE CRC ERROR */ + .ascq = 0x05, /* N/A */ + }, + [TCM_SNACK_REJECTED] = { + .key = ABORTED_COMMAND, + .asc = 0x11, /* READ ERROR */ + .ascq = 0x13, /* FAILED RETRANSMISSION REQUEST */ + }, + [TCM_WRITE_PROTECTED] = { + .key = DATA_PROTECT, + .asc = 0x27, /* WRITE PROTECTED */ + }, + [TCM_ADDRESS_OUT_OF_RANGE] = { + .key = ILLEGAL_REQUEST, + .asc = 0x21, /* LOGICAL BLOCK ADDRESS OUT OF RANGE */ + }, + [TCM_CHECK_CONDITION_UNIT_ATTENTION] = { + .key = UNIT_ATTENTION, + }, + [TCM_CHECK_CONDITION_NOT_READY] = { + .key = NOT_READY, + }, + [TCM_MISCOMPARE_VERIFY] = { + .key = MISCOMPARE, + .asc = 0x1d, /* MISCOMPARE DURING VERIFY OPERATION */ + .ascq = 0x00, + }, + [TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED] = { + .key = ILLEGAL_REQUEST, + .asc = 0x10, + .ascq = 0x01, /* LOGICAL BLOCK GUARD CHECK FAILED */ + .add_sector_info = true, + }, + [TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED] = { + .key = ILLEGAL_REQUEST, + .asc = 0x10, + .ascq = 0x02, /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */ + .add_sector_info = true, + }, + [TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED] = { + .key = ILLEGAL_REQUEST, + .asc = 0x10, + .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ + .add_sector_info = true, + }, + [TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE] = { + /* + * Returning ILLEGAL REQUEST would cause immediate IO errors on + * Solaris initiators. Returning NOT READY instead means the + * operations will be retried a finite number of times and we + * can survive intermittent errors. + */ + .key = NOT_READY, + .asc = 0x08, /* LOGICAL UNIT COMMUNICATION FAILURE */ + }, +}; + +static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) +{ + const struct sense_info *si; + u8 *buffer = cmd->sense_buffer; + int r = (__force int)reason; + u8 asc, ascq; + + if (r < ARRAY_SIZE(sense_info_table) && sense_info_table[r].key) + si = &sense_info_table[r]; + else + si = &sense_info_table[(__force int) + TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE]; + + buffer[SPC_SENSE_KEY_OFFSET] = si->key; + if (reason == TCM_CHECK_CONDITION_UNIT_ATTENTION) { + core_scsi3_ua_for_check_condition(cmd, &asc, &ascq); + WARN_ON_ONCE(asc == 0); + } else if (si->asc == 0) { + WARN_ON_ONCE(cmd->scsi_asc == 0); + asc = cmd->scsi_asc; + ascq = cmd->scsi_ascq; + } else { + asc = si->asc; + ascq = si->ascq; + } + buffer[SPC_ASC_KEY_OFFSET] = asc; + buffer[SPC_ASCQ_KEY_OFFSET] = ascq; + if (si->add_sector_info) + transport_err_sector_info(cmd->sense_buffer, cmd->bad_sector); +} + int transport_send_check_condition_and_sense(struct se_cmd *cmd, sense_reason_t reason, int from_transport) { - unsigned char *buffer = cmd->sense_buffer; unsigned long flags; - u8 asc = 0, ascq = 0; spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) { @@ -2644,242 +2786,13 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, cmd->se_cmd_flags |= SCF_SENT_CHECK_CONDITION; spin_unlock_irqrestore(&cmd->t_state_lock, flags); - if (!reason && from_transport) - goto after_reason; - - if (!from_transport) + if (!from_transport) { cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE; - - /* - * Actual SENSE DATA, see SPC-3 7.23.2 SPC_SENSE_KEY_OFFSET uses - * SENSE KEY values from include/scsi/scsi.h - */ - switch (reason) { - case TCM_NO_SENSE: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* Not Ready */ - buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY; - /* NO ADDITIONAL SENSE INFORMATION */ - buffer[SPC_ASC_KEY_OFFSET] = 0; - buffer[SPC_ASCQ_KEY_OFFSET] = 0; - break; - case TCM_NON_EXISTENT_LUN: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* LOGICAL UNIT NOT SUPPORTED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x25; - break; - case TCM_UNSUPPORTED_SCSI_OPCODE: - case TCM_SECTOR_COUNT_TOO_MANY: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* INVALID COMMAND OPERATION CODE */ - buffer[SPC_ASC_KEY_OFFSET] = 0x20; - break; - case TCM_UNKNOWN_MODE_PAGE: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* INVALID FIELD IN CDB */ - buffer[SPC_ASC_KEY_OFFSET] = 0x24; - break; - case TCM_CHECK_CONDITION_ABORT_CMD: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ABORTED COMMAND */ - buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND; - /* BUS DEVICE RESET FUNCTION OCCURRED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x29; - buffer[SPC_ASCQ_KEY_OFFSET] = 0x03; - break; - case TCM_INCORRECT_AMOUNT_OF_DATA: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ABORTED COMMAND */ - buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND; - /* WRITE ERROR */ - buffer[SPC_ASC_KEY_OFFSET] = 0x0c; - /* NOT ENOUGH UNSOLICITED DATA */ - buffer[SPC_ASCQ_KEY_OFFSET] = 0x0d; - break; - case TCM_INVALID_CDB_FIELD: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* INVALID FIELD IN CDB */ - buffer[SPC_ASC_KEY_OFFSET] = 0x24; - break; - case TCM_INVALID_PARAMETER_LIST: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* INVALID FIELD IN PARAMETER LIST */ - buffer[SPC_ASC_KEY_OFFSET] = 0x26; - break; - case TCM_PARAMETER_LIST_LENGTH_ERROR: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* PARAMETER LIST LENGTH ERROR */ - buffer[SPC_ASC_KEY_OFFSET] = 0x1a; - break; - case TCM_UNEXPECTED_UNSOLICITED_DATA: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ABORTED COMMAND */ - buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND; - /* WRITE ERROR */ - buffer[SPC_ASC_KEY_OFFSET] = 0x0c; - /* UNEXPECTED_UNSOLICITED_DATA */ - buffer[SPC_ASCQ_KEY_OFFSET] = 0x0c; - break; - case TCM_SERVICE_CRC_ERROR: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ABORTED COMMAND */ - buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND; - /* PROTOCOL SERVICE CRC ERROR */ - buffer[SPC_ASC_KEY_OFFSET] = 0x47; - /* N/A */ - buffer[SPC_ASCQ_KEY_OFFSET] = 0x05; - break; - case TCM_SNACK_REJECTED: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ABORTED COMMAND */ - buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND; - /* READ ERROR */ - buffer[SPC_ASC_KEY_OFFSET] = 0x11; - /* FAILED RETRANSMISSION REQUEST */ - buffer[SPC_ASCQ_KEY_OFFSET] = 0x13; - break; - case TCM_WRITE_PROTECTED: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* DATA PROTECT */ - buffer[SPC_SENSE_KEY_OFFSET] = DATA_PROTECT; - /* WRITE PROTECTED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x27; - break; - case TCM_ADDRESS_OUT_OF_RANGE: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* LOGICAL BLOCK ADDRESS OUT OF RANGE */ - buffer[SPC_ASC_KEY_OFFSET] = 0x21; - break; - case TCM_CHECK_CONDITION_UNIT_ATTENTION: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* UNIT ATTENTION */ - buffer[SPC_SENSE_KEY_OFFSET] = UNIT_ATTENTION; - core_scsi3_ua_for_check_condition(cmd, &asc, &ascq); - buffer[SPC_ASC_KEY_OFFSET] = asc; - buffer[SPC_ASCQ_KEY_OFFSET] = ascq; - break; - case TCM_CHECK_CONDITION_NOT_READY: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* Not Ready */ - buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY; - buffer[SPC_ASC_KEY_OFFSET] = cmd->scsi_asc; - buffer[SPC_ASCQ_KEY_OFFSET] = cmd->scsi_ascq; - break; - case TCM_MISCOMPARE_VERIFY: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE; - /* MISCOMPARE DURING VERIFY OPERATION */ - buffer[SPC_ASC_KEY_OFFSET] = 0x1d; - buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; - break; - case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* LOGICAL BLOCK GUARD CHECK FAILED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x10; - buffer[SPC_ASCQ_KEY_OFFSET] = 0x01; - transport_err_sector_info(buffer, cmd->bad_sector); - break; - case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x10; - buffer[SPC_ASCQ_KEY_OFFSET] = 0x02; - transport_err_sector_info(buffer, cmd->bad_sector); - break; - case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* ILLEGAL REQUEST */ - buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; - /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ - buffer[SPC_ASC_KEY_OFFSET] = 0x10; - buffer[SPC_ASCQ_KEY_OFFSET] = 0x03; - transport_err_sector_info(buffer, cmd->bad_sector); - break; - case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: - default: - /* CURRENT ERROR */ - buffer[0] = 0x70; - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; - /* - * Returning ILLEGAL REQUEST would cause immediate IO errors on - * Solaris initiators. Returning NOT READY instead means the - * operations will be retried a finite number of times and we - * can survive intermittent errors. - */ - buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY; - /* LOGICAL UNIT COMMUNICATION FAILURE */ - buffer[SPC_ASC_KEY_OFFSET] = 0x08; - break; + translate_sense_reason(cmd, reason); + cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; } - /* - * This code uses linux/include/scsi/scsi.h SAM status codes! - */ - cmd->scsi_status = SAM_STAT_CHECK_CONDITION; - /* - * Automatically padded, this value is encoded in the fabric's - * data_length response PDU containing the SCSI defined sense data. - */ - cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; -after_reason: trace_target_cmd_complete(cmd); return cmd->se_tfo->queue_status(cmd); } -- cgit v0.10.2 From 7708c1656552ddd60b9b9df3a9ee156acd1801ba Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Jul 2015 17:58:52 +0300 Subject: scsi: Move sense handling routines to scsi_common Sense data handling is also done in the target stack. Hence, move sense handling routines to scsi_common so the target will be able to use them as well. Signed-off-by: Sagi Grimberg Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c index 2ff0922..41432c1 100644 --- a/drivers/scsi/scsi_common.c +++ b/drivers/scsi/scsi_common.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* NB: These are exposed through /proc/scsi/scsi and form part of the ABI. @@ -176,3 +177,100 @@ bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, return true; } EXPORT_SYMBOL(scsi_normalize_sense); + +/** + * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format. + * @sense_buffer: byte array of descriptor format sense data + * @sb_len: number of valid bytes in sense_buffer + * @desc_type: value of descriptor type to find + * (e.g. 0 -> information) + * + * Notes: + * only valid when sense data is in descriptor format + * + * Return value: + * pointer to start of (first) descriptor if found else NULL + */ +const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, + int desc_type) +{ + int add_sen_len, add_len, desc_len, k; + const u8 * descp; + + if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7]))) + return NULL; + if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73)) + return NULL; + add_sen_len = (add_sen_len < (sb_len - 8)) ? + add_sen_len : (sb_len - 8); + descp = &sense_buffer[8]; + for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) { + descp += desc_len; + add_len = (k < (add_sen_len - 1)) ? descp[1]: -1; + desc_len = add_len + 2; + if (descp[0] == desc_type) + return descp; + if (add_len < 0) // short descriptor ?? + break; + } + return NULL; +} +EXPORT_SYMBOL(scsi_sense_desc_find); + +/** + * scsi_build_sense_buffer - build sense data in a buffer + * @desc: Sense format (non zero == descriptor format, + * 0 == fixed format) + * @buf: Where to build sense data + * @key: Sense key + * @asc: Additional sense code + * @ascq: Additional sense code qualifier + * + **/ +void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq) +{ + if (desc) { + buf[0] = 0x72; /* descriptor, current */ + buf[1] = key; + buf[2] = asc; + buf[3] = ascq; + buf[7] = 0; + } else { + buf[0] = 0x70; /* fixed, current */ + buf[2] = key; + buf[7] = 0xa; + buf[12] = asc; + buf[13] = ascq; + } +} +EXPORT_SYMBOL(scsi_build_sense_buffer); + +/** + * scsi_set_sense_information - set the information field in a + * formatted sense data buffer + * @buf: Where to build sense data + * @info: 64-bit information value to be set + * + **/ +void scsi_set_sense_information(u8 *buf, u64 info) +{ + if ((buf[0] & 0x7f) == 0x72) { + u8 *ucp, len; + + len = buf[7]; + ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0); + if (!ucp) { + buf[7] = len + 0xa; + ucp = buf + 8 + len; + } + ucp[0] = 0; + ucp[1] = 0xa; + ucp[2] = 0x80; /* Valid bit */ + ucp[3] = 0; + put_unaligned_be64(info, &ucp[4]); + } else if ((buf[0] & 0x7f) == 0x70) { + buf[0] |= 0x80; + put_unaligned_be64(info, &buf[3]); + } +} +EXPORT_SYMBOL(scsi_set_sense_information); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 106884a..6e6b2d2 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -34,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -2408,45 +2408,6 @@ bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd, EXPORT_SYMBOL(scsi_command_normalize_sense); /** - * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format. - * @sense_buffer: byte array of descriptor format sense data - * @sb_len: number of valid bytes in sense_buffer - * @desc_type: value of descriptor type to find - * (e.g. 0 -> information) - * - * Notes: - * only valid when sense data is in descriptor format - * - * Return value: - * pointer to start of (first) descriptor if found else NULL - */ -const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, - int desc_type) -{ - int add_sen_len, add_len, desc_len, k; - const u8 * descp; - - if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7]))) - return NULL; - if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73)) - return NULL; - add_sen_len = (add_sen_len < (sb_len - 8)) ? - add_sen_len : (sb_len - 8); - descp = &sense_buffer[8]; - for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) { - descp += desc_len; - add_len = (k < (add_sen_len - 1)) ? descp[1]: -1; - desc_len = add_len + 2; - if (descp[0] == desc_type) - return descp; - if (add_len < 0) // short descriptor ?? - break; - } - return NULL; -} -EXPORT_SYMBOL(scsi_sense_desc_find); - -/** * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format) * @sense_buffer: byte array of sense data * @sb_len: number of valid bytes in sense_buffer @@ -2495,61 +2456,3 @@ int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, } } EXPORT_SYMBOL(scsi_get_sense_info_fld); - -/** - * scsi_build_sense_buffer - build sense data in a buffer - * @desc: Sense format (non zero == descriptor format, - * 0 == fixed format) - * @buf: Where to build sense data - * @key: Sense key - * @asc: Additional sense code - * @ascq: Additional sense code qualifier - * - **/ -void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq) -{ - if (desc) { - buf[0] = 0x72; /* descriptor, current */ - buf[1] = key; - buf[2] = asc; - buf[3] = ascq; - buf[7] = 0; - } else { - buf[0] = 0x70; /* fixed, current */ - buf[2] = key; - buf[7] = 0xa; - buf[12] = asc; - buf[13] = ascq; - } -} -EXPORT_SYMBOL(scsi_build_sense_buffer); - -/** - * scsi_set_sense_information - set the information field in a - * formatted sense data buffer - * @buf: Where to build sense data - * @info: 64-bit information value to be set - * - **/ -void scsi_set_sense_information(u8 *buf, u64 info) -{ - if ((buf[0] & 0x7f) == 0x72) { - u8 *ucp, len; - - len = buf[7]; - ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0); - if (!ucp) { - buf[7] = len + 0xa; - ucp = buf + 8 + len; - } - ucp[0] = 0; - ucp[1] = 0xa; - ucp[2] = 0x80; /* Valid bit */ - ucp[3] = 0; - put_unaligned_be64(info, &ucp[4]); - } else if ((buf[0] & 0x7f) == 0x70) { - buf[0] |= 0x80; - put_unaligned_be64(info, &buf[3]); - } -} -EXPORT_SYMBOL(scsi_set_sense_information); diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 676b03b..156d673 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -61,4 +61,9 @@ static inline bool scsi_sense_valid(const struct scsi_sense_hdr *sshdr) extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, struct scsi_sense_hdr *sshdr); +extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); +extern void scsi_set_sense_information(u8 *buf, u64 info); +extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, + int desc_type); + #endif /* _SCSI_COMMON_H_ */ diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 4942710..dbb8c64 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -4,6 +4,7 @@ #include #include +#include struct scsi_device; struct Scsi_Host; @@ -21,15 +22,9 @@ static inline bool scsi_sense_is_deferred(const struct scsi_sense_hdr *sshdr) return ((sshdr->response_code >= 0x70) && (sshdr->response_code & 1)); } -extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, - int desc_type); - extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, u64 * info_out); -extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); -extern void scsi_set_sense_information(u8 *buf, u64 info); - extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { -- cgit v0.10.2 From 9ec1e1ce3a0f854b9150e7888a373392fbbe7442 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Jul 2015 17:58:53 +0300 Subject: target: Use scsi helpers to build the sense data correctly Instead of open coding the sense buffer construction, use scsi scsi_build_sense_buffer() and scsi_set_sense_information() helpers which moved to scsi_common. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index b074443..c43dcbf 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1157,32 +1157,11 @@ static sense_reason_t spc_emulate_request_sense(struct se_cmd *cmd) if (!rbuf) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) { - /* - * CURRENT ERROR, UNIT ATTENTION - */ - buf[0] = 0x70; - buf[SPC_SENSE_KEY_OFFSET] = UNIT_ATTENTION; - - /* - * The Additional Sense Code (ASC) from the UNIT ATTENTION - */ - buf[SPC_ASC_KEY_OFFSET] = ua_asc; - buf[SPC_ASCQ_KEY_OFFSET] = ua_ascq; - buf[7] = 0x0A; - } else { - /* - * CURRENT ERROR, NO SENSE - */ - buf[0] = 0x70; - buf[SPC_SENSE_KEY_OFFSET] = NO_SENSE; - - /* - * NO ADDITIONAL SENSE INFORMATION - */ - buf[SPC_ASC_KEY_OFFSET] = 0x00; - buf[7] = 0x0A; - } + if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) + scsi_build_sense_buffer(0, buf, UNIT_ATTENTION, + ua_asc, ua_ascq); + else + scsi_build_sense_buffer(0, buf, NO_SENSE, 0x0, 0x0); memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length)); transport_kunmap_data_sg(cmd); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 6ef44c9..f528a9d 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -2615,19 +2616,6 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) } EXPORT_SYMBOL(transport_wait_for_tasks); -static -void transport_err_sector_info(unsigned char *buffer, sector_t bad_sector) -{ - /* Place failed LBA in sense data information descriptor 0. */ - buffer[SPC_ADD_SENSE_LEN_OFFSET] = 0xc; - buffer[SPC_DESC_TYPE_OFFSET] = 0; /* Information */ - buffer[SPC_ADDITIONAL_DESC_LEN_OFFSET] = 0xa; - buffer[SPC_VALIDITY_OFFSET] = 0x80; - - /* Descriptor Information: failing sector */ - put_unaligned_be64(bad_sector, &buffer[12]); -} - struct sense_info { u8 key; u8 asc; @@ -2754,7 +2742,6 @@ static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) si = &sense_info_table[(__force int) TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE]; - buffer[SPC_SENSE_KEY_OFFSET] = si->key; if (reason == TCM_CHECK_CONDITION_UNIT_ATTENTION) { core_scsi3_ua_for_check_condition(cmd, &asc, &ascq); WARN_ON_ONCE(asc == 0); @@ -2766,10 +2753,10 @@ static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) asc = si->asc; ascq = si->ascq; } - buffer[SPC_ASC_KEY_OFFSET] = asc; - buffer[SPC_ASCQ_KEY_OFFSET] = ascq; + + scsi_build_sense_buffer(0, buffer, si->key, asc, ascq); if (si->add_sector_info) - transport_err_sector_info(cmd->sense_buffer, cmd->bad_sector); + scsi_set_sense_information(buffer, cmd->bad_sector); } int -- cgit v0.10.2 From 734ca5c467842186bf836d0b33379a51cfe259da Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Jul 2015 17:58:54 +0300 Subject: target: Return ABORTED_COMMAND sense key for PI errors PI errors were reported with ILLEGAL_REQUEST sense key but there was actually no problem with the request. Target detected PI errors should be reported with aborted command sense key. Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index f528a9d..2bece60 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2700,19 +2700,19 @@ static const struct sense_info sense_info_table[] = { .ascq = 0x00, }, [TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED] = { - .key = ILLEGAL_REQUEST, + .key = ABORTED_COMMAND, .asc = 0x10, .ascq = 0x01, /* LOGICAL BLOCK GUARD CHECK FAILED */ .add_sector_info = true, }, [TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED] = { - .key = ILLEGAL_REQUEST, + .key = ABORTED_COMMAND, .asc = 0x10, .ascq = 0x02, /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */ .add_sector_info = true, }, [TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED] = { - .key = ILLEGAL_REQUEST, + .key = ABORTED_COMMAND, .asc = 0x10, .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ .add_sector_info = true, -- cgit v0.10.2 From 3e963b2d3c93e0546e911d681f37d35f0f79b54f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Jul 2015 07:33:07 -0700 Subject: tcm_qla2xxx: Remove set-but-not-used variables Detected these by building with W=1. This patch does not change any functionality. Signed-off-by: Bart Van Assche Acked-by: Himanshu Madhani Cc: Quinn Tran Cc: Saurav Kashyap Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index d9a8c60..474d0c0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1367,9 +1367,7 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) struct qla_hw_data *ha = tgt->ha; scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); struct se_session *se_sess; - struct se_node_acl *se_nacl; struct tcm_qla2xxx_lport *lport; - struct tcm_qla2xxx_nacl *nacl; BUG_ON(in_interrupt()); @@ -1379,8 +1377,6 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) dump_stack(); return; } - se_nacl = se_sess->se_node_acl; - nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); lport = vha->vha_tgt.target_lport_ptr; if (!lport) { @@ -1684,7 +1680,6 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha, (struct tcm_qla2xxx_lport *)target_lport_ptr; struct tcm_qla2xxx_lport *base_lport = (struct tcm_qla2xxx_lport *)base_vha->vha_tgt.target_lport_ptr; - struct tcm_qla2xxx_tpg *base_tpg; struct fc_vport_identifiers vport_id; if (!qla_tgt_mode_enabled(base_vha)) { @@ -1697,7 +1692,6 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha, pr_err("qla2xxx base_lport or tpg_1 not available\n"); return -EPERM; } - base_tpg = base_lport->tpg_1; memset(&vport_id, 0, sizeof(vport_id)); vport_id.port_name = npiv_wwpn; -- cgit v0.10.2 From 12306b425d0dbab7b60f54e02d67cf3dfae494d1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 15 Jul 2015 10:55:36 +0300 Subject: scsi: Fix wrong additional sense length in descriptor format The sense header additional sense length should be the accumulated size of all the descriptors. Information descriptor size is 12 bytes. When setting the additional sense length we should add 0xc instead of 0xa. Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c index 41432c1..ee6bdf4 100644 --- a/drivers/scsi/scsi_common.c +++ b/drivers/scsi/scsi_common.c @@ -260,7 +260,7 @@ void scsi_set_sense_information(u8 *buf, u64 info) len = buf[7]; ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0); if (!ucp) { - buf[7] = len + 0xa; + buf[7] = len + 0xc; ucp = buf + 8 + len; } ucp[0] = 0; -- cgit v0.10.2 From f5a8b3a796db01b639435515b3adc003b9f27387 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 15 Jul 2015 10:55:37 +0300 Subject: scsi: Protect against buffer possible overflow in scsi_set_sense_information Make sure that the input sense buffer has sufficient length to fit the information descriptor (12 additional bytes). Modify scsi_set_sense_information to receive the sense buffer length and adjust its callers scsi target and libata. (Fix patch fuzz in scsi_set_sense_information - nab) Reported-by: Hannes Reinecke Signed-off-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Cc: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3131adc..2fb7c79 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -289,7 +289,9 @@ void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, return; information = ata_tf_read_block(tf, NULL); - scsi_set_sense_information(cmd->sense_buffer, information); + scsi_set_sense_information(cmd->sense_buffer, + SCSI_SENSE_BUFFERSIZE, + information); } static ssize_t diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c index ee6bdf4..c126966 100644 --- a/drivers/scsi/scsi_common.c +++ b/drivers/scsi/scsi_common.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -249,10 +250,13 @@ EXPORT_SYMBOL(scsi_build_sense_buffer); * scsi_set_sense_information - set the information field in a * formatted sense data buffer * @buf: Where to build sense data + * @buf_len: buffer length * @info: 64-bit information value to be set * + * Return value: + * 0 on success or EINVAL for invalid sense buffer length **/ -void scsi_set_sense_information(u8 *buf, u64 info) +int scsi_set_sense_information(u8 *buf, int buf_len, u64 info) { if ((buf[0] & 0x7f) == 0x72) { u8 *ucp, len; @@ -263,6 +267,11 @@ void scsi_set_sense_information(u8 *buf, u64 info) buf[7] = len + 0xc; ucp = buf + 8 + len; } + + if (buf_len < len + 0xc) + /* Not enough room for info */ + return -EINVAL; + ucp[0] = 0; ucp[1] = 0xa; ucp[2] = 0x80; /* Valid bit */ @@ -272,5 +281,7 @@ void scsi_set_sense_information(u8 *buf, u64 info) buf[0] |= 0x80; put_unaligned_be64(info, &buf[3]); } + + return 0; } EXPORT_SYMBOL(scsi_set_sense_information); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2bece60..7fb031b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2729,7 +2729,7 @@ static const struct sense_info sense_info_table[] = { }, }; -static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) +static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) { const struct sense_info *si; u8 *buffer = cmd->sense_buffer; @@ -2756,7 +2756,11 @@ static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) scsi_build_sense_buffer(0, buffer, si->key, asc, ascq); if (si->add_sector_info) - scsi_set_sense_information(buffer, cmd->bad_sector); + return scsi_set_sense_information(buffer, + cmd->scsi_sense_length, + cmd->bad_sector); + + return 0; } int @@ -2774,10 +2778,14 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, spin_unlock_irqrestore(&cmd->t_state_lock, flags); if (!from_transport) { + int rc; + cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE; - translate_sense_reason(cmd, reason); cmd->scsi_status = SAM_STAT_CHECK_CONDITION; cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; + rc = translate_sense_reason(cmd, reason); + if (rc) + return rc; } trace_target_cmd_complete(cmd); diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 156d673..11571b2 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -62,7 +62,7 @@ extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, struct scsi_sense_hdr *sshdr); extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); -extern void scsi_set_sense_information(u8 *buf, u64 info); +int scsi_set_sense_information(u8 *buf, int buf_len, u64 info); extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, int desc_type); -- cgit v0.10.2 From 4e4937e8aefde8d49340e803ebbedcdf4b43e5f0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 16 Jul 2015 10:28:05 +0300 Subject: target: Return descriptor format sense data in case the LU spans 64bit sectors In case a LU spans 64bit sectors, fixed size sense data information field is only 32 bits which means the sector information will be truncated. Thus, if the LU spans 64bit sectors, use descriptor format sense data to correctly report sector information. Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index 62ea4e8..d746a3a 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -176,3 +176,8 @@ core_delete_hba(struct se_hba *hba) kfree(hba); return 0; } + +bool target_sense_desc_format(struct se_device *dev) +{ + return dev->transport->get_blocks(dev) > U32_MAX; +} diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index c43dcbf..b949d33 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -761,7 +761,12 @@ static int spc_modesense_control(struct se_cmd *cmd, u8 pc, u8 *p) if (pc == 1) goto out; - p[2] = 2; + /* GLTSD: No implicit save of log parameters */ + p[2] = (1 << 1); + if (target_sense_desc_format(dev)) + /* D_SENSE: Descriptor format sense data for 64bit sectors */ + p[2] |= (1 << 2); + /* * From spc4r23, 7.4.7 Control mode page * @@ -1144,6 +1149,7 @@ static sense_reason_t spc_emulate_request_sense(struct se_cmd *cmd) unsigned char *rbuf; u8 ua_asc = 0, ua_ascq = 0; unsigned char buf[SE_SENSE_BUF]; + bool desc_format = target_sense_desc_format(cmd->se_dev); memset(buf, 0, SE_SENSE_BUF); @@ -1158,10 +1164,10 @@ static sense_reason_t spc_emulate_request_sense(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) - scsi_build_sense_buffer(0, buf, UNIT_ATTENTION, + scsi_build_sense_buffer(desc_format, buf, UNIT_ATTENTION, ua_asc, ua_ascq); else - scsi_build_sense_buffer(0, buf, NO_SENSE, 0x0, 0x0); + scsi_build_sense_buffer(desc_format, buf, NO_SENSE, 0x0, 0x0); memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length)); transport_kunmap_data_sg(cmd); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7fb031b..98155db 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2735,6 +2735,7 @@ static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) u8 *buffer = cmd->sense_buffer; int r = (__force int)reason; u8 asc, ascq; + bool desc_format = target_sense_desc_format(cmd->se_dev); if (r < ARRAY_SIZE(sense_info_table) && sense_info_table[r].key) si = &sense_info_table[r]; @@ -2754,7 +2755,7 @@ static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason) ascq = si->ascq; } - scsi_build_sense_buffer(0, buffer, si->key, asc, ascq); + scsi_build_sense_buffer(desc_format, buffer, si->key, asc, ascq); if (si->add_sector_info) return scsi_set_sense_information(buffer, cmd->scsi_sense_length, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1e5c8f9..56cf8e4 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -93,4 +93,6 @@ bool target_lun_is_rdonly(struct se_cmd *); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); +bool target_sense_desc_format(struct se_device *dev); + #endif /* TARGET_CORE_BACKEND_H */ -- cgit v0.10.2 From a73c2a2f9123605022bedbd2b59ca7e76036f0b3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 15 Jul 2015 10:55:39 +0300 Subject: libiscsi: Use scsi helper to set information descriptor In case encountered a PI error, use scsi_set_sense_information instead of open coding information descriptor format. Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 8053f24..bb5ca7f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -853,12 +853,9 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(1, sc->sense_buffer, ILLEGAL_REQUEST, 0x10, ascq); - sc->sense_buffer[7] = 0xc; /* Additional sense length */ - sc->sense_buffer[8] = 0; /* Information desc type */ - sc->sense_buffer[9] = 0xa; /* Additional desc length */ - sc->sense_buffer[10] = 0x80; /* Validity bit */ - - put_unaligned_be64(sector, &sc->sense_buffer[12]); + scsi_set_sense_information(sc->sense_buffer, + SCSI_SENSE_BUFFERSIZE, + sector); goto out; } } -- cgit v0.10.2 From 53c46995b6ed7cb32a28bce1f4a25065baf65d8f Mon Sep 17 00:00:00 2001 From: Chris Zankel Date: Mon, 20 Jul 2015 16:29:49 -0700 Subject: target: remove unused lun_flags field from se_lun The lun_flags field is not used, so drop it. Signed-off-by: Chris Zankel Signed-off-by: Spencer Baugh Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a681644..d57a0cb 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -680,7 +680,6 @@ struct se_lun { #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; u32 lun_access; - u32 lun_flags; u32 lun_index; /* RELATIVE TARGET PORT IDENTIFER */ -- cgit v0.10.2 From b6a54b8d895648d915c7e8308f3d3e6bf2505d69 Mon Sep 17 00:00:00 2001 From: Chris Zankel Date: Mon, 20 Jul 2015 16:29:50 -0700 Subject: target: remove initiatorname field in se_acl_lun From: Chris Zankel The initiatorname field in se_acl_lun is only a copy of the same field in se_node_acl, so remove it and use the version in se_node_acl where needed (it's actually only used for pr_debug) Signed-off-by: Chris Zankel Signed-off-by: Spencer Baugh Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index db70342..55f2cb2 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -620,8 +620,6 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( lacl->mapped_lun = mapped_lun; lacl->se_lun_nacl = nacl; - snprintf(lacl->initiatorname, TRANSPORT_IQN_LEN, "%s", - nacl->initiatorname); return lacl; } @@ -656,7 +654,7 @@ int core_dev_add_initiator_node_lun_acl( " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO", - lacl->initiatorname); + nacl->initiatorname); /* * Check to see if there are any existing persistent reservation APTPL * pre-registrations that need to be enabled for this LUN ACL.. @@ -688,7 +686,7 @@ int core_dev_del_initiator_node_lun_acl( " InitiatorNode: %s Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, - lacl->initiatorname, lacl->mapped_lun); + nacl->initiatorname, lacl->mapped_lun); return 0; } @@ -701,7 +699,7 @@ void core_dev_free_initiator_node_lun_acl( " Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), tpg->se_tpg_tfo->get_fabric_name(), - lacl->initiatorname, lacl->mapped_lun); + lacl->se_lun_nacl->initiatorname, lacl->mapped_lun); kfree(lacl); } diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 48a3698..be42429 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -203,7 +203,7 @@ static ssize_t target_fabric_mappedlun_store_write_protect( pr_debug("%s_ConfigFS: Changed Initiator ACL: %s" " Mapped LUN: %llu Write Protect bit to %s\n", se_tpg->se_tpg_tfo->get_fabric_name(), - lacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF"); + se_nacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF"); return count; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index d57a0cb..95e65bd 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -593,7 +593,6 @@ struct se_ml_stat_grps { }; struct se_lun_acl { - char initiatorname[TRANSPORT_IQN_LEN]; u64 mapped_lun; struct se_node_acl *se_lun_nacl; struct se_lun *se_lun; -- cgit v0.10.2 From aa75679c797c0250a853e600e45368f1f9545c27 Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Mon, 20 Jul 2015 17:12:12 -0700 Subject: target/iscsi: Use proper SGL accessors for digest computation Current implementation assumes that all the buffers of an IO are linked with a single SG list, which is OK because target-core is only allocating a contigious scatterlist region. However, this assumption is wrong for se_cmd descriptors that want to use chaining across multiple SGL regions. Fix this up by using proper SGL accessors for digest payload computation. Signed-off-by: Alexei Potashnik Cc: Roland Dreier Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 4e68b62..a4cf58c 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1209,7 +1209,6 @@ static u32 iscsit_do_crypto_hash_sg( u8 *pad_bytes) { u32 data_crc; - u32 i; struct scatterlist *sg; unsigned int page_off; @@ -1218,15 +1217,15 @@ static u32 iscsit_do_crypto_hash_sg( sg = cmd->first_data_sg; page_off = cmd->first_data_sg_off; - i = 0; while (data_length) { - u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off)); + u32 cur_len = min_t(u32, data_length, (sg->length - page_off)); - crypto_hash_update(hash, &sg[i], cur_len); + crypto_hash_update(hash, sg, cur_len); data_length -= cur_len; page_off = 0; - i++; + /* iscsit_map_iovec has already checked for invalid sg pointers */ + sg = sg_next(sg); } if (padding) { -- cgit v0.10.2 From c72c5250224d475614a00c1d7e54a67f77cd3410 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 22 Jul 2015 15:08:18 -0700 Subject: target: allow underflow/overflow for PR OUT etc. commands It's not necessarily a fatal error if a command with a data-out phase has a data length that differs from the transport data length (e.g. PERSISTENT RESERVE OUT might have a parameter list length in the CDB that's smaller than the FC_DL field), so allow these commands. The Windows compliance test sends them. Signed-off-by: Roland Dreier Signed-off-by: Spencer Baugh Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 98155db..729ec73 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1088,9 +1088,9 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) " 0x%02x\n", cmd->se_tfo->get_fabric_name(), cmd->data_length, size, cmd->t_task_cdb[0]); - if (cmd->data_direction == DMA_TO_DEVICE) { - pr_err("Rejecting underflow/overflow" - " WRITE data\n"); + if (cmd->data_direction == DMA_TO_DEVICE && + cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { + pr_err("Rejecting underflow/overflow WRITE data\n"); return TCM_INVALID_CDB_FIELD; } /* -- cgit v0.10.2 From 568e1f6524b06ff113cebf5711832ca95d37259f Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 22 Jul 2015 15:01:36 -0700 Subject: target: improve unsupported opcode message Make the warning about unsupported SCSI opcode more useful: - Add in the initiator name so we know who's sending it. - Print the warning even for opcodes that spc_parse_cdb() knows about but that we don't handle. Signed-off-by: Joern Engel Signed-off-by: Spencer Baugh Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index b949d33..a07d455 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1390,9 +1390,6 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) } break; default: - pr_warn("TARGET_CORE[%s]: Unsupported SCSI Opcode" - " 0x%02x, sending CHECK_CONDITION.\n", - cmd->se_tfo->get_fabric_name(), cdb[0]); return TCM_UNSUPPORTED_SCSI_OPCODE; } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 729ec73..bd68727 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1247,6 +1247,11 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) } ret = dev->transport->parse_cdb(cmd); + if (ret == TCM_UNSUPPORTED_SCSI_OPCODE) + pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n", + cmd->se_tfo->get_fabric_name(), + cmd->se_sess->se_node_acl->initiatorname, + cmd->t_task_cdb[0]); if (ret) return ret; -- cgit v0.10.2 From 45182ed576898b846a98ac3bff2ddcb9d35a0181 Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Thu, 23 Jul 2015 15:27:46 -0700 Subject: target: add support for START_STOP_UNIT SCSI opcode AIX servers using VIOS servers that virtualize FC cards will have a problem booting without support for START_STOP_UNIT. Cite sbc3r36 exactly, clean up if conditions (rob + hch) Signed-off-by: Brian Bunker Signed-off-by: Spencer Baugh Reviewed-by: Christoph Hellwig Cc: "Robert Elliott (Server Storage)" Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index ac72150..4fc8343 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -154,6 +154,38 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) return 0; } +static sense_reason_t +sbc_emulate_startstop(struct se_cmd *cmd) +{ + unsigned char *cdb = cmd->t_task_cdb; + + /* + * See sbc3r36 section 5.25 + * Immediate bit should be set since there is nothing to complete + * POWER CONDITION MODIFIER 0h + */ + if (!(cdb[1] & 1) || cdb[2] || cdb[3]) + return TCM_INVALID_CDB_FIELD; + + /* + * See sbc3r36 section 5.25 + * POWER CONDITION 0h START_VALID - process START and LOEJ + */ + if (cdb[4] >> 4 & 0xf) + return TCM_INVALID_CDB_FIELD; + + /* + * See sbc3r36 section 5.25 + * LOEJ 0h - nothing to load or unload + * START 1h - we are ready + */ + if (!(cdb[4] & 1) || (cdb[4] & 2) || (cdb[4] & 4)) + return TCM_INVALID_CDB_FIELD; + + target_complete_cmd(cmd, SAM_STAT_GOOD); + return 0; +} + sector_t sbc_get_write_same_sectors(struct se_cmd *cmd) { u32 num_blocks; @@ -1069,6 +1101,10 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = 0; cmd->execute_cmd = sbc_emulate_noop; break; + case START_STOP: + size = 0; + cmd->execute_cmd = sbc_emulate_startstop; + break; default: ret = spc_parse_cdb(cmd, &size); if (ret) -- cgit v0.10.2 From 9c31820b6ab93ec298ad98abeee49759b5f5958c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 21 Jul 2015 17:45:32 -0700 Subject: target: Remove cmd->se_ordered_id (unused except debug log lines) For every command, we set se_ordered_id by doing atomic_inc_return on dev->dev_ordered_id for the corresponding device. However, the only places this value gets used are in pr_debug() calls, which doesn't seem worth an extra atomic op per IO. Signed-off-by: Roland Dreier Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 55f2cb2..dcc424a 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -769,7 +769,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) spin_lock_init(&dev->se_tmr_lock); spin_lock_init(&dev->qf_cmd_lock); sema_init(&dev->caw_sem, 1); - atomic_set(&dev->dev_ordered_id, 0); INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list); spin_lock_init(&dev->t10_wwn.t10_vpd_lock); INIT_LIST_HEAD(&dev->t10_pr.registration_list); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index bd68727..3f0b500 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1178,14 +1178,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd) " emulation is not supported\n"); return TCM_INVALID_CDB_FIELD; } - /* - * Used to determine when ORDERED commands should go from - * Dormant to Active status. - */ - cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id); - pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n", - cmd->se_ordered_id, cmd->sam_task_attr, - dev->transport->name); + return 0; } @@ -1773,16 +1766,14 @@ static bool target_handle_task_attr(struct se_cmd *cmd) */ switch (cmd->sam_task_attr) { case TCM_HEAD_TAG: - pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x, " - "se_ordered_id: %u\n", - cmd->t_task_cdb[0], cmd->se_ordered_id); + pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n", + cmd->t_task_cdb[0]); return false; case TCM_ORDERED_TAG: atomic_inc_mb(&dev->dev_ordered_sync); - pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, " - " se_ordered_id: %u\n", - cmd->t_task_cdb[0], cmd->se_ordered_id); + pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n", + cmd->t_task_cdb[0]); /* * Execute an ORDERED command if no other older commands @@ -1806,10 +1797,8 @@ static bool target_handle_task_attr(struct se_cmd *cmd) list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list); spin_unlock(&dev->delayed_cmd_lock); - pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to" - " delayed CMD list, se_ordered_id: %u\n", - cmd->t_task_cdb[0], cmd->sam_task_attr, - cmd->se_ordered_id); + pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn", + cmd->t_task_cdb[0], cmd->sam_task_attr); return true; } @@ -1894,20 +1883,18 @@ static void transport_complete_task_attr(struct se_cmd *cmd) if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { atomic_dec_mb(&dev->simple_cmds); dev->dev_cur_ordered_id++; - pr_debug("Incremented dev->dev_cur_ordered_id: %u for" - " SIMPLE: %u\n", dev->dev_cur_ordered_id, - cmd->se_ordered_id); + pr_debug("Incremented dev->dev_cur_ordered_id: %u for SIMPLE\n", + dev->dev_cur_ordered_id); } else if (cmd->sam_task_attr == TCM_HEAD_TAG) { dev->dev_cur_ordered_id++; - pr_debug("Incremented dev_cur_ordered_id: %u for" - " HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id, - cmd->se_ordered_id); + pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n", + dev->dev_cur_ordered_id); } else if (cmd->sam_task_attr == TCM_ORDERED_TAG) { atomic_dec_mb(&dev->dev_ordered_sync); dev->dev_cur_ordered_id++; - pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:" - " %u\n", dev->dev_cur_ordered_id, cmd->se_ordered_id); + pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", + dev->dev_cur_ordered_id); } target_restart_delayed_cmds(dev); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 95e65bd..3afd8db 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -454,7 +454,6 @@ struct se_cmd { unsigned unknown_data_length:1; /* See se_cmd_flags_table */ u32 se_cmd_flags; - u32 se_ordered_id; /* Total size in bytes associated with command */ u32 data_length; u32 residual_count; @@ -744,7 +743,6 @@ struct se_device { atomic_long_t write_bytes; /* Active commands on this virtual SE device */ atomic_t simple_cmds; - atomic_t dev_ordered_id; atomic_t dev_ordered_sync; atomic_t dev_qf_count; u32 export_count; -- cgit v0.10.2 From 915270c3cacfc80cb3fe7fdd8130439039a85bbb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 21 Jul 2015 17:45:33 -0700 Subject: target: Shrink struct se_cmd by rearranging fields On x86-64, these changes reduce the struct size from 528 bytes to 496 bytes. We save a cacheline and get under 512 bytes for better packing. Signed-off-by: Roland Dreier Acked-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 3afd8db..ac9bf1c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -439,6 +439,9 @@ struct se_cmd { u8 scsi_asc; u8 scsi_ascq; u16 scsi_sense_length; + unsigned cmd_wait_set:1; + unsigned unknown_data_length:1; + bool state_active:1; u64 tag; /* SAM command identifier aka task tag */ /* Delay for ALUA Active/NonOptimized state access in milliseconds */ int alua_nonop_delay; @@ -450,8 +453,6 @@ struct se_cmd { unsigned int map_tag; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; - unsigned cmd_wait_set:1; - unsigned unknown_data_length:1; /* See se_cmd_flags_table */ u32 se_cmd_flags; /* Total size in bytes associated with command */ @@ -471,7 +472,6 @@ struct se_cmd { struct se_tmr_req *se_tmr_req; struct list_head se_cmd_list; struct completion cmd_wait_comp; - struct kref cmd_kref; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); @@ -491,6 +491,7 @@ struct se_cmd { #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) spinlock_t t_state_lock; + struct kref cmd_kref; struct completion t_transport_stop_comp; struct work_struct work; @@ -503,8 +504,10 @@ struct se_cmd { struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; + /* Used for lun->lun_ref counting */ + int lun_ref_active; + struct list_head state_list; - bool state_active; /* old task stop completion, consider merging with some of the above */ struct completion task_stop_comp; @@ -512,20 +515,17 @@ struct se_cmd { /* backend private data */ void *priv; - /* Used for lun->lun_ref counting */ - int lun_ref_active; - /* DIF related members */ enum target_prot_op prot_op; enum target_prot_type prot_type; u8 prot_checks; + bool prot_pto; u32 prot_length; u32 reftag_seed; struct scatterlist *t_prot_sg; unsigned int t_prot_nents; sense_reason_t pi_err; sector_t bad_sector; - bool prot_pto; }; struct se_ua { -- cgit v0.10.2 From ab81a5e0660a058c2cc728fc114fa3082be78952 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 23 Jul 2015 22:33:21 +0200 Subject: target: check DPO/FUA usage for COMPARE AND WRITE COMPARE AND WRITE requests should fail if DPO or FUA is set, but the device is not advertising support. Signed-off-by: David Disseldorp Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 4fc8343..0b4b2a6 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -992,6 +992,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) " than 1\n", sectors); return TCM_INVALID_CDB_FIELD; } + if (sbc_check_dpofua(dev, cmd, cdb)) + return TCM_INVALID_CDB_FIELD; + /* * Double size because we have two buffers, note that * zero is not an error.. -- cgit v0.10.2 From 9d8c94e48a5faafae1e52bec18b8d99211b2ccd4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 31 Jul 2015 14:08:31 +0530 Subject: drivers: target: Drop unlikely before IS_ERR(_OR_NULL) IS_ERR(_OR_NULL) already contain an 'unlikely' compiler flag and there is no need to do that again from its callers. Drop it. Signed-off-by: Viresh Kumar Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 6803172..aa3caca 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -255,7 +255,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg) struct ft_cmd *cmd = arg; struct fc_frame_header *fh; - if (unlikely(IS_ERR(fp))) { + if (IS_ERR(fp)) { /* XXX need to find cmd if queued */ cmd->seq = NULL; cmd->aborted = true; -- cgit v0.10.2 From a6415cddc4e6e1675a5648e7785aef716980c90c Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Sat, 1 Aug 2015 00:10:12 -0700 Subject: iscsi-target: Add tpg_enabled_sendtargets for disabled discovery This patch adds a new tpg_enabled_sendtargets configfs attribute to allow in-band sendtargets discovery information to include target-portal-groups (TPGs) in !TPG_STATE_ACTIVE state. This functionality is useful for clustered iSCSI targets, where TPGTs handled on remote cluster nodes should be advertised in the SendTargets response. By default, this new attribute retains the default behaviour of existing code which to ignore portal-groups in !TPG_STATE_ACTIVE state. Signed-off-by: David Disseldorp Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index a4cf58c..986518c 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3398,6 +3398,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, int target_name_printed; unsigned char buf[ISCSI_IQN_LEN+12]; /* iqn + "TargetName=" + \0 */ unsigned char *text_in = cmd->text_in_ptr, *text_ptr = NULL; + bool active; buffer_len = min(conn->conn_ops->MaxRecvDataSegmentLength, SENDTARGETS_BUF_LIMIT); @@ -3451,13 +3452,12 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } spin_lock(&tpg->tpg_state_lock); - if ((tpg->tpg_state == TPG_STATE_FREE) || - (tpg->tpg_state == TPG_STATE_INACTIVE)) { - spin_unlock(&tpg->tpg_state_lock); - continue; - } + active = (tpg->tpg_state == TPG_STATE_ACTIVE); spin_unlock(&tpg->tpg_state_lock); + if (!active && tpg->tpg_attrib.tpg_enabled_sendtargets) + continue; + spin_lock(&tpg->tpg_np_lock); list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index c1898c8..05f1664 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1010,6 +1010,11 @@ TPG_ATTR(t10_pi, S_IRUGO | S_IWUSR); */ DEF_TPG_ATTRIB(fabric_prot_type); TPG_ATTR(fabric_prot_type, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_tpg_enabled_sendtargets + */ +DEF_TPG_ATTRIB(tpg_enabled_sendtargets); +TPG_ATTR(tpg_enabled_sendtargets, S_IRUGO | S_IWUSR); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_authentication.attr, @@ -1024,6 +1029,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_default_erl.attr, &iscsi_tpg_attrib_t10_pi.attr, &iscsi_tpg_attrib_fabric_prot_type.attr, + &iscsi_tpg_attrib_tpg_enabled_sendtargets.attr, NULL, }; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 968068f..8262a85 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -226,6 +226,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a->default_erl = TA_DEFAULT_ERL; a->t10_pi = TA_DEFAULT_T10_PI; a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE; + a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -892,3 +893,21 @@ int iscsit_ta_fabric_prot_type( return 0; } + +int iscsit_ta_tpg_enabled_sendtargets( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->tpg_enabled_sendtargets = flag; + pr_debug("iSCSI_TPG[%hu] - TPG enabled bit required for SendTargets:" + " %s\n", tpg->tpgt, (a->tpg_enabled_sendtargets) ? "ON" : "OFF"); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 95ff5bd..a2790fd 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -40,5 +40,6 @@ extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32); extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32); extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32); +extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 34117b8..ab46585 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -62,6 +62,8 @@ /* T10 protection information disabled by default */ #define TA_DEFAULT_T10_PI 0 #define TA_DEFAULT_FABRIC_PROT_TYPE 0 +/* TPG status needs to be enabled to return sendtargets discovery endpoint info */ +#define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1 #define ISCSI_IOV_DATA_BUFFER 5 @@ -763,6 +765,7 @@ struct iscsi_tpg_attrib { u32 default_erl; u8 t10_pi; u32 fabric_prot_type; + u32 tpg_enabled_sendtargets; struct iscsi_portal_group *tpg; }; -- cgit v0.10.2 From 24c7d6c7316c72301bf8ecfc1189fd476176fd75 Mon Sep 17 00:00:00 2001 From: Sebastian Herbszt Date: Mon, 3 Aug 2015 00:21:50 +0200 Subject: qla2xxx: Update tcm_qla2xxx module description to 24xx+ Pre 24xx HBAs are not supported by tcm_qla2xxx, so go ahead and reflect this for informational purposes. Also fix QLogic spelling. Signed-off-by: Sebastian Herbszt Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig index 33f60c9..a0f732b 100644 --- a/drivers/scsi/qla2xxx/Kconfig +++ b/drivers/scsi/qla2xxx/Kconfig @@ -32,10 +32,10 @@ config SCSI_QLA_FC They are also included in the linux-firmware tree as well. config TCM_QLA2XXX - tristate "TCM_QLA2XXX fabric module for Qlogic 2xxx series target mode HBAs" + tristate "TCM_QLA2XXX fabric module for QLogic 24xx+ series target mode HBAs" depends on SCSI_QLA_FC && TARGET_CORE depends on LIBFC select BTREE default n ---help--- - Say Y here to enable the TCM_QLA2XXX fabric module for Qlogic 2xxx series target mode HBAs + Say Y here to enable the TCM_QLA2XXX fabric module for QLogic 24xx+ series target mode HBAs diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 474d0c0..c621623 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1956,7 +1956,7 @@ static void __exit tcm_qla2xxx_exit(void) tcm_qla2xxx_deregister_configfs(); } -MODULE_DESCRIPTION("TCM QLA2XXX series NPIV enabled fabric driver"); +MODULE_DESCRIPTION("TCM QLA24XX+ series NPIV enabled fabric driver"); MODULE_LICENSE("GPL"); module_init(tcm_qla2xxx_init); module_exit(tcm_qla2xxx_exit); -- cgit v0.10.2 From 109e2381749c1cfd94a0d22b2b54142539024973 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 23 Jul 2015 14:53:32 -0700 Subject: target: Drop iSCSI use of mutex around max_cmd_sn increment In a performance profile, taking a mutex in iscsit_increment_maxcmdsn() shows up very high. However taking a mutex around "sess->max_cmd_sn += 1" seems pretty silly: we're not serializing against other contexts in any useful way. I did a quick audit and there don't appear to be any other places that use max_cmd_sn within the mutex more than once, so this lock can't be providing any useful serialization. (Get correct values for logging - fix whitespace damage) Signed-off-by: Roland Dreier Signed-off-by: Spencer Baugh Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 986518c..e55f49c 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2555,7 +2555,7 @@ static int iscsit_send_conn_drop_async_message( cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION; hdr->param1 = cpu_to_be16(cmd->logout_cid); hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait); @@ -2627,7 +2627,7 @@ iscsit_build_datain_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, hdr->statsn = cpu_to_be32(0xFFFFFFFF); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); hdr->datasn = cpu_to_be32(datain->data_sn); hdr->offset = cpu_to_be32(datain->offset); @@ -2838,7 +2838,7 @@ iscsit_build_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Built Logout Response ITT: 0x%08x StatSN:" " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n", @@ -2901,7 +2901,7 @@ iscsit_build_nopin_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Built NOPIN %s Response ITT: 0x%08x, TTT: 0x%08x," " StatSN: 0x%08x, Length %u\n", (nopout_response) ? @@ -3048,7 +3048,7 @@ static int iscsit_send_r2t( hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag); hdr->statsn = cpu_to_be32(conn->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); hdr->r2tsn = cpu_to_be32(r2t->r2t_sn); hdr->data_offset = cpu_to_be32(r2t->offset); hdr->data_length = cpu_to_be32(r2t->xfer_len); @@ -3201,7 +3201,7 @@ void iscsit_build_rsp_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Built SCSI Response, ITT: 0x%08x, StatSN: 0x%08x," " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n", @@ -3320,7 +3320,7 @@ iscsit_build_task_mgt_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Built Task Management Response ITT: 0x%08x," " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n", @@ -3575,7 +3575,7 @@ iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, */ cmd->maxcmdsn_inc = 0; hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Built Text Response: ITT: 0x%08x, TTT: 0x%08x, StatSN: 0x%08x," " Length: %u, CID: %hu F: %d C: %d\n", cmd->init_task_tag, @@ -3653,7 +3653,7 @@ iscsit_build_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn, cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); } EXPORT_SYMBOL(iscsit_build_reject); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 05f1664..48f708b 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -656,6 +656,7 @@ static ssize_t lio_target_nacl_show_info( struct iscsi_conn *conn; struct se_session *se_sess; ssize_t rb = 0; + u32 max_cmd_sn; spin_lock_bh(&se_nacl->nacl_sess_lock); se_sess = se_nacl->nacl_sess; @@ -703,11 +704,12 @@ static ssize_t lio_target_nacl_show_info( " Values]-----------------------\n"); rb += sprintf(page+rb, " CmdSN/WR : CmdSN/WC : ExpCmdSN" " : MaxCmdSN : ITT : TTT\n"); + max_cmd_sn = (u32) atomic_read(&sess->max_cmd_sn); rb += sprintf(page+rb, " 0x%08x 0x%08x 0x%08x 0x%08x" " 0x%08x 0x%08x\n", sess->cmdsn_window, - (sess->max_cmd_sn - sess->exp_cmd_sn) + 1, - sess->exp_cmd_sn, sess->max_cmd_sn, + (max_cmd_sn - sess->exp_cmd_sn) + 1, + sess->exp_cmd_sn, max_cmd_sn, sess->init_task_tag, sess->targ_xfer_tag); rb += sprintf(page+rb, "----------------------[iSCSI" " Connections]-------------------------\n"); diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 5fabcd3..07d2ef6 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -47,7 +47,7 @@ void iscsit_determine_maxcmdsn(struct iscsi_session *sess) * core_set_queue_depth_for_node(). */ sess->cmdsn_window = se_nacl->queue_depth; - sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1; + atomic_set(&sess->max_cmd_sn, (u32) atomic_read(&sess->max_cmd_sn) + se_nacl->queue_depth - 1); } void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess) @@ -57,9 +57,6 @@ void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess cmd->maxcmdsn_inc = 1; - mutex_lock(&sess->cmdsn_mutex); - sess->max_cmd_sn += 1; - pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); - mutex_unlock(&sess->cmdsn_mutex); + pr_debug("Updated MaxCmdSN to 0x%08x\n", atomic_inc_return(&sess->max_cmd_sn)); } EXPORT_SYMBOL(iscsit_increment_maxcmdsn); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 3d0fe4f..bd192f8 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -330,7 +330,7 @@ static int iscsi_login_zero_tsih_s1( * The FFP CmdSN window values will be allocated from the TPG's * Initiator Node's ACL once the login has been successfully completed. */ - sess->max_cmd_sn = be32_to_cpu(pdu->cmdsn); + atomic_set(&sess->max_cmd_sn, be32_to_cpu(pdu->cmdsn)); sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL); if (!sess->sess_ops) { diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 8c02fa3..74d041e 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -340,7 +340,6 @@ static int iscsi_target_check_first_request( static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login) { u32 padding = 0; - struct iscsi_session *sess = conn->sess; struct iscsi_login_rsp *login_rsp; login_rsp = (struct iscsi_login_rsp *) login->rsp; @@ -352,7 +351,7 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log login_rsp->itt = login->init_task_tag; login_rsp->statsn = cpu_to_be32(conn->stat_sn++); login_rsp->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - login_rsp->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + login_rsp->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn)); pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x," " ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:" @@ -367,10 +366,8 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log return -1; login->rsp_length = 0; - mutex_lock(&sess->cmdsn_mutex); - login_rsp->exp_cmdsn = cpu_to_be32(sess->exp_cmd_sn); - login_rsp->max_cmdsn = cpu_to_be32(sess->max_cmd_sn); - mutex_unlock(&sess->cmdsn_mutex); + login_rsp->exp_cmdsn = cpu_to_be32(login_rsp->exp_cmdsn); + login_rsp->max_cmdsn = cpu_to_be32(login_rsp->max_cmdsn); return 0; } diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index cf59c39..11320df 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -50,7 +50,7 @@ u8 iscsit_tmr_abort_task( pr_err("Unable to locate RefTaskTag: 0x%08x on CID:" " %hu.\n", hdr->rtt, conn->cid); return (iscsi_sna_gte(be32_to_cpu(hdr->refcmdsn), conn->sess->exp_cmd_sn) && - iscsi_sna_lte(be32_to_cpu(hdr->refcmdsn), conn->sess->max_cmd_sn)) ? + iscsi_sna_lte(be32_to_cpu(hdr->refcmdsn), (u32) atomic_read(&conn->sess->max_cmd_sn))) ? ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK; } if (ref_cmd->cmd_sn != be32_to_cpu(hdr->refcmdsn)) { diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index a2bff07..7df4fac 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -233,6 +233,7 @@ struct iscsi_r2t *iscsit_get_holder_for_r2tsn( static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn) { + u32 max_cmdsn; int ret; /* @@ -241,10 +242,10 @@ static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cm * or order CmdSNs due to multiple connection sessions and/or * CRC failures. */ - if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) { + max_cmdsn = atomic_read(&sess->max_cmd_sn); + if (iscsi_sna_gt(cmdsn, max_cmdsn)) { pr_err("Received CmdSN: 0x%08x is greater than" - " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn, - sess->max_cmd_sn); + " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn, max_cmdsn); ret = CMDSN_MAXCMDSN_OVERRUN; } else if (cmdsn == sess->exp_cmd_sn) { diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index ab46585..d4616ef 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -637,7 +637,7 @@ struct iscsi_session { /* session wide counter: expected command sequence number */ u32 exp_cmd_sn; /* session wide counter: maximum allowed command sequence number */ - u32 max_cmd_sn; + atomic_t max_cmd_sn; struct list_head sess_ooo_cmdsn_list; /* LIO specific session ID */ -- cgit v0.10.2 From 286272137f66199f87ea254397181b9bab5e5467 Mon Sep 17 00:00:00 2001 From: Radivoje Jovanovic Date: Fri, 31 Jul 2015 08:07:45 -0700 Subject: thermal/powerclamp: add cpu id for skylake h/s Add support for Intel Skylake H/S Signed-off-by: Radivoje Jovanovic Signed-off-by: Zhang Rui diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 5820e85..63879a1 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -698,6 +698,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = { { X86_VENDOR_INTEL, 6, 0x4f}, { X86_VENDOR_INTEL, 6, 0x56}, { X86_VENDOR_INTEL, 6, 0x57}, + { X86_VENDOR_INTEL, 6, 0x5e}, {} }; MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); -- cgit v0.10.2 From 35676de2164ff37209431e7e0e49a7466720d9be Mon Sep 17 00:00:00 2001 From: Radivoje Jovanovic Date: Fri, 31 Jul 2015 08:07:54 -0700 Subject: thermal/powerclamp: add cpu id for Skylake u/y Add support for Intel Skylake u/y Signed-off-by: Radivoje Jovanovic Signed-off-by: Zhang Rui diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 63879a1..6e01723 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -695,6 +695,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = { { X86_VENDOR_INTEL, 6, 0x46}, { X86_VENDOR_INTEL, 6, 0x4c}, { X86_VENDOR_INTEL, 6, 0x4d}, + { X86_VENDOR_INTEL, 6, 0x4e}, { X86_VENDOR_INTEL, 6, 0x4f}, { X86_VENDOR_INTEL, 6, 0x56}, { X86_VENDOR_INTEL, 6, 0x57}, -- cgit v0.10.2 From 1f22dc4494e203d6987fc708f414b4adf8614036 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 29 Jun 2015 08:53:18 -0700 Subject: thermal/powerclamp: add cpu id for denlow platform Add support for Intel Denlow UP server platform. Signed-off-by: Jacob Pan Signed-off-by: Zhang Rui diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 6e01723..a42e8a3 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -693,6 +693,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = { { X86_VENDOR_INTEL, 6, 0x3f}, { X86_VENDOR_INTEL, 6, 0x45}, { X86_VENDOR_INTEL, 6, 0x46}, + { X86_VENDOR_INTEL, 6, 0x47}, { X86_VENDOR_INTEL, 6, 0x4c}, { X86_VENDOR_INTEL, 6, 0x4d}, { X86_VENDOR_INTEL, 6, 0x4e}, -- cgit v0.10.2 From 25a0a5ce16ecd7e60c4cf1436892433873e9d99d Mon Sep 17 00:00:00 2001 From: Ni Wade Date: Tue, 14 Jul 2015 15:40:56 +0800 Subject: thermal: add available policies sysfs attribute The Linux thermal framework support to change thermal governor policy in userspace, but it can't show what available policies supported. This patch adds available_policies attribute to the thermal framework, it can list the thermal governors which can be used for a particular zone. This attribute is read only. Signed-off-by: Wei Ni Reviewed-by: Javi Merino Signed-off-by: Zhang Rui diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index c1f6864..10f062e 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -180,6 +180,7 @@ Thermal zone device sys I/F, created once it's registered: |---temp: Current temperature |---mode: Working mode of the thermal zone |---policy: Thermal governor used for this zone + |---available_policies: Available thermal governors for this zone |---trip_point_[0-*]_temp: Trip point temperature |---trip_point_[0-*]_type: Trip point type |---trip_point_[0-*]_hyst: Hysteresis value for this trip point @@ -256,6 +257,10 @@ policy One of the various thermal governors used for a particular zone. RW, Required +available_policies + Available thermal governors which can be used for a particular zone. + RO, Required + trip_point_[0-*]_temp The temperature above which trip point will be fired. Unit: millidegree Celsius @@ -417,6 +422,7 @@ method, the sys I/F structure will be built like this: |---temp: 37000 |---mode: enabled |---policy: step_wise + |---available_policies: step_wise fair_share |---trip_point_0_temp: 100000 |---trip_point_0_type: critical |---trip_point_1_temp: 80000 diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 04659bf..c470095 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -847,6 +847,27 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf) return sprintf(buf, "%s\n", tz->governor->name); } +static ssize_t +available_policies_show(struct device *dev, struct device_attribute *devattr, + char *buf) +{ + struct thermal_governor *pos; + ssize_t count = 0; + ssize_t size = PAGE_SIZE; + + mutex_lock(&thermal_governor_lock); + + list_for_each_entry(pos, &thermal_governor_list, governor_list) { + size = PAGE_SIZE - count; + count += scnprintf(buf + count, size, "%s ", pos->name); + } + count += scnprintf(buf + count, size, "\n"); + + mutex_unlock(&thermal_governor_lock); + + return count; +} + #ifdef CONFIG_THERMAL_EMULATION static ssize_t emul_temp_store(struct device *dev, struct device_attribute *attr, @@ -1032,6 +1053,7 @@ static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store); static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store); +static DEVICE_ATTR(available_policies, S_IRUGO, available_policies_show, NULL); /* sys I/F for cooling device */ #define to_cooling_device(_dev) \ @@ -1817,6 +1839,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (result) goto unregister; + /* Create available_policies attribute */ + result = device_create_file(&tz->device, &dev_attr_available_policies); + if (result) + goto unregister; + /* Update 'this' zone's governor information */ mutex_lock(&thermal_governor_lock); @@ -1917,6 +1944,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) if (tz->ops->get_mode) device_remove_file(&tz->device, &dev_attr_mode); device_remove_file(&tz->device, &dev_attr_policy); + device_remove_file(&tz->device, &dev_attr_available_policies); remove_trip_attrs(tz); thermal_set_governor(tz, NULL); -- cgit v0.10.2 From 17e8351a77397e8a83727eb17e3a3e9b8ab5257a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Jul 2015 08:12:54 +0200 Subject: thermal: consistently use int for temperatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The thermal code uses int, long and unsigned long for temperatures in different places. Using an unsigned type limits the thermal framework to positive temperatures without need. Also several drivers currently will report temperatures near UINT_MAX for temperatures below 0°C. This will probably immediately shut the machine down due to overtemperature if started below 0°C. 'long' is 64bit on several architectures. This is not needed since INT_MAX °mC is above the melting point of all known materials. Consistently use a plain 'int' for temperatures throughout the thermal code and the drivers. This only changes the places in the drivers where the temperature is passed around as pointer, when drivers internally use another type this is not changed. Signed-off-by: Sascha Hauer Acked-by: Geert Uytterhoeven Reviewed-by: Jean Delvare Reviewed-by: Lukasz Majewski Reviewed-by: Darren Hart Reviewed-by: Heiko Stuebner Reviewed-by: Peter Feuerer Cc: Punit Agrawal Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Jean Delvare Cc: Peter Feuerer Cc: Heiko Stuebner Cc: Lukasz Majewski Cc: Stephen Warren Cc: Thierry Reding Cc: linux-acpi@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-omap@vger.kernel.org Cc: linux-samsung-soc@vger.kernel.org Cc: Guenter Roeck Cc: Rafael J. Wysocki Cc: Maxime Ripard Cc: Darren Hart Cc: lm-sensors@lm-sensors.org Signed-off-by: Zhang Rui diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 6d4e44e..e66ad25 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -529,8 +529,7 @@ static void acpi_thermal_check(void *data) /* sys I/F for generic thermal sysfs support */ -static int thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) +static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) { struct acpi_thermal *tz = thermal->devdata; int result; @@ -637,7 +636,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal, } static int thermal_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { struct acpi_thermal *tz = thermal->devdata; int i; @@ -690,7 +689,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, } static int thermal_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temperature) { + int *temperature) +{ struct acpi_thermal *tz = thermal->devdata; if (tz->trips.critical.flags.valid) { @@ -713,8 +713,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, return -EINVAL; if (type == THERMAL_TRIP_ACTIVE) { - unsigned long trip_temp; - unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET( + int trip_temp; + int temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET( tz->temperature, tz->kelvin_offset); if (thermal_get_trip_temp(thermal, trip, &trip_temp)) return -EINVAL; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index fe41d5a..e4e57bb 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -104,7 +104,7 @@ static inline long lm75_reg_to_mc(s16 temp, u8 resolution) /* sysfs attributes for hwmon */ -static int lm75_read_temp(void *dev, long *temp) +static int lm75_read_temp(void *dev, int *temp) { struct lm75_data *data = lm75_update_device(dev); diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index dc0b76c..feed306 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -477,7 +477,7 @@ static int ntc_thermistor_get_ohm(struct ntc_data *data) return -EINVAL; } -static int ntc_read_temp(void *dev, long *temp) +static int ntc_read_temp(void *dev, int *temp) { struct ntc_data *data = dev_get_drvdata(dev); int ohm; diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 9da2735..6548262 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -98,7 +98,7 @@ static struct tmp102 *tmp102_update_device(struct device *dev) return tmp102; } -static int tmp102_read_temp(void *dev, long *temp) +static int tmp102_read_temp(void *dev, int *temp) { struct tmp102 *tmp102 = tmp102_update_device(dev); diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index c011699..4857943 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -191,7 +191,7 @@ static void sun4i_ts_close(struct input_dev *dev) writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC); } -static int sun4i_get_temp(const struct sun4i_ts_data *ts, long *temp) +static int sun4i_get_temp(const struct sun4i_ts_data *ts, int *temp) { /* No temp_data until the first irq */ if (ts->temp_data == -1) @@ -202,7 +202,7 @@ static int sun4i_get_temp(const struct sun4i_ts_data *ts, long *temp) return 0; } -static int sun4i_get_tz_temp(void *data, long *temp) +static int sun4i_get_tz_temp(void *data, int *temp) { return sun4i_get_temp(data, temp); } @@ -215,14 +215,14 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, char *buf) { struct sun4i_ts_data *ts = dev_get_drvdata(dev); - long temp; + int temp; int error; error = sun4i_get_temp(ts, &temp); if (error) return error; - return sprintf(buf, "%ld\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t show_temp_label(struct device *dev, diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 1ef02da..460fa67 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -346,8 +346,7 @@ static void acerhdf_check_param(struct thermal_zone_device *thermal) * as late as the polling interval is since we can't do that in the respective * accessors of the module parameters. */ -static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal, - unsigned long *t) +static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal, int *t) { int temp, err = 0; @@ -453,7 +452,7 @@ static int acerhdf_get_trip_type(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) + int *temp) { if (trip != 0) return -EINVAL; @@ -464,7 +463,7 @@ static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) + int *temp) { if (trip == 0) *temp = fanon; @@ -477,7 +476,7 @@ static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temperature) + int *temperature) { *temperature = ACERHDF_TEMP_CRIT; return 0; diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index 0944e83..9f713b8 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -132,7 +132,7 @@ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) * to achieve very close approximate temp value with less than * 0.5C error */ -static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) +static int adc_to_temp(int direct, uint16_t adc_val, int *tp) { int temp; @@ -174,14 +174,13 @@ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) * * Can sleep */ -static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int mid_read_temp(struct thermal_zone_device *tzd, int *temp) { struct thermal_device_info *td_info = tzd->devdata; uint16_t adc_val, addr; uint8_t data = 0; int ret; - unsigned long curr_temp; - + int curr_temp; addr = td_info->chnl_addr; @@ -453,7 +452,7 @@ static SIMPLE_DEV_PM_OPS(mid_thermal_pm, * * Can sleep */ -static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int read_curr_temp(struct thermal_zone_device *tzd, int *temp) { WARN_ON(tzd == NULL); return mid_read_temp(tzd, temp); diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c index 1c202cc..907293e 100644 --- a/drivers/power/charger-manager.c +++ b/drivers/power/charger-manager.c @@ -619,7 +619,7 @@ static int cm_get_battery_temperature(struct charger_manager *cm, #ifdef CONFIG_THERMAL if (cm->tzd_batt) { - ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp); + ret = thermal_zone_get_temp(cm->tzd_batt, temp); if (!ret) /* Calibrate temperature unit */ *temp /= 100; diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 869284c..456987c 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -557,7 +557,7 @@ EXPORT_SYMBOL_GPL(power_supply_unreg_notifier); #ifdef CONFIG_THERMAL static int power_supply_read_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { struct power_supply *psy; union power_supply_propval val; diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 01255fd..26b8d32 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -155,7 +155,7 @@ static bool armada_is_valid(struct armada_thermal_priv *priv) } static int armada_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct armada_thermal_priv *priv = thermal->devdata; unsigned long reg; diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 2fb273c..652acd8 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -107,8 +107,7 @@ static int db8500_cdev_unbind(struct thermal_zone_device *thermal, } /* Callback to get current temperature */ -static int db8500_sys_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) +static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; @@ -180,7 +179,7 @@ static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal, /* Callback to get trip point temperature */ static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; @@ -195,7 +194,7 @@ static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, /* Callback to get critical trip point temperature */ static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c index 09f6e30..a0bc9de 100644 --- a/drivers/thermal/dove_thermal.c +++ b/drivers/thermal/dove_thermal.c @@ -93,7 +93,7 @@ static int dove_init_sensor(const struct dove_thermal_priv *priv) } static int dove_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { unsigned long reg; struct dove_thermal_priv *priv = thermal->devdata; diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index c2c10bb..34fe365 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -34,7 +34,7 @@ static int get_trip_level(struct thermal_zone_device *tz) { int count = 0; - unsigned long trip_temp; + int trip_temp; enum thermal_trip_type trip_type; if (tz->trips == 0 || !tz->ops->get_trip_temp) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index c5dd76b..70836c5 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -25,14 +25,13 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { - long trip_temp; - unsigned long trip_hyst; + int trip_temp, trip_hyst; struct thermal_instance *instance; tz->ops->get_trip_temp(tz, trip, &trip_temp); tz->ops->get_trip_hyst(tz, trip, &trip_hyst); - dev_dbg(&tz->device, "Trip%d[temp=%ld]:temp=%d:hyst=%ld\n", + dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n", trip, trip_temp, tz->temperature, trip_hyst); diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index d5dd357..49aa068 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -155,7 +155,7 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_unlock(&data->thermal_lock); } -static int hisi_thermal_get_temp(void *_sensor, long *temp) +static int hisi_thermal_get_temp(void *_sensor, int *temp) { struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; @@ -178,7 +178,7 @@ static int hisi_thermal_get_temp(void *_sensor, long *temp) data->irq_bind_sensor = sensor_id; mutex_unlock(&data->thermal_lock); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%ld, thres=%d\n", + dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", sensor->id, data->irq_enabled, *temp, sensor->thres_temp); /* * Bind irq to sensor for two cases: diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index fde4c28..4bec1d3 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -98,10 +98,10 @@ struct imx_thermal_data { enum thermal_device_mode mode; struct regmap *tempmon; u32 c1, c2; /* See formula in imx_get_sensor_data() */ - unsigned long temp_passive; - unsigned long temp_critical; - unsigned long alarm_temp; - unsigned long last_temp; + int temp_passive; + int temp_critical; + int alarm_temp; + int last_temp; bool irq_enabled; int irq; struct clk *thermal_clk; @@ -109,7 +109,7 @@ struct imx_thermal_data { }; static void imx_set_panic_temp(struct imx_thermal_data *data, - signed long panic_temp) + int panic_temp) { struct regmap *map = data->tempmon; int critical_value; @@ -121,7 +121,7 @@ static void imx_set_panic_temp(struct imx_thermal_data *data, } static void imx_set_alarm_temp(struct imx_thermal_data *data, - signed long alarm_temp) + int alarm_temp) { struct regmap *map = data->tempmon; int alarm_value; @@ -133,7 +133,7 @@ static void imx_set_alarm_temp(struct imx_thermal_data *data, TEMPSENSE0_ALARM_VALUE_SHIFT); } -static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +static int imx_get_temp(struct thermal_zone_device *tz, int *temp) { struct imx_thermal_data *data = tz->devdata; struct regmap *map = data->tempmon; @@ -189,13 +189,13 @@ static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) { imx_set_alarm_temp(data, data->temp_passive); - dev_dbg(&tz->device, "thermal alarm off: T < %lu\n", + dev_dbg(&tz->device, "thermal alarm off: T < %d\n", data->alarm_temp / 1000); } } if (*temp != data->last_temp) { - dev_dbg(&tz->device, "millicelsius: %ld\n", *temp); + dev_dbg(&tz->device, "millicelsius: %d\n", *temp); data->last_temp = *temp; } @@ -262,8 +262,7 @@ static int imx_get_trip_type(struct thermal_zone_device *tz, int trip, return 0; } -static int imx_get_crit_temp(struct thermal_zone_device *tz, - unsigned long *temp) +static int imx_get_crit_temp(struct thermal_zone_device *tz, int *temp) { struct imx_thermal_data *data = tz->devdata; @@ -272,7 +271,7 @@ static int imx_get_crit_temp(struct thermal_zone_device *tz, } static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long *temp) + int *temp) { struct imx_thermal_data *data = tz->devdata; @@ -282,7 +281,7 @@ static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, } static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long temp) + int temp) { struct imx_thermal_data *data = tz->devdata; @@ -434,7 +433,7 @@ static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev) { struct imx_thermal_data *data = dev; - dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n", + dev_dbg(&data->tz->device, "THERMAL ALARM: T > %d\n", data->alarm_temp / 1000); thermal_zone_device_update(data->tz); diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 031018e..5836e55 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -186,7 +186,7 @@ static int int3400_thermal_run_osc(acpi_handle handle, } static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { *temp = 20 * 1000; /* faked temp sensor with 20C */ return 0; diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c index 1e25133..b9b2666 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c @@ -20,7 +20,7 @@ #include "int340x_thermal_zone.h" static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, - unsigned long *temp) + int *temp) { struct int34x_thermal_zone *d = zone->devdata; unsigned long long tmp; @@ -49,7 +49,7 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, } static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct int34x_thermal_zone *d = zone->devdata; int i; @@ -114,7 +114,7 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long temp) + int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -136,7 +136,7 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, static int int340x_thermal_get_trip_hyst(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -163,7 +163,7 @@ static struct thermal_zone_device_ops int340x_thermal_zone_ops = { }; static int int340x_thermal_get_trip_config(acpi_handle handle, char *name, - unsigned long *temp) + int *temp) { unsigned long long r; acpi_status status; diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h index 9f38ab7..aaadf72 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h @@ -21,7 +21,7 @@ #define INT340X_THERMAL_MAX_ACT_TRIP_COUNT 10 struct active_trip { - unsigned long temp; + int temp; int id; bool valid; }; @@ -31,11 +31,11 @@ struct int34x_thermal_zone { struct active_trip act_trips[INT340X_THERMAL_MAX_ACT_TRIP_COUNT]; unsigned long *aux_trips; int aux_trip_nr; - unsigned long psv_temp; + int psv_temp; int psv_trip_id; - unsigned long crt_temp; + int crt_temp; int crt_trip_id; - unsigned long hot_temp; + int hot_temp; int hot_trip_id; struct thermal_zone_device *zone; struct thermal_zone_device_ops *override_ops; diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index 3df3dc3..ccc0ad0 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -145,7 +145,7 @@ static int get_tjmax(void) return -EINVAL; } -static int read_temp_msr(unsigned long *temp) +static int read_temp_msr(int *temp) { int cpu; u32 eax, edx; @@ -177,7 +177,7 @@ err_ret: } static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone, - unsigned long *temp) + int *temp) { int ret; diff --git a/drivers/thermal/intel_quark_dts_thermal.c b/drivers/thermal/intel_quark_dts_thermal.c index 4434ec8..5ed90e6 100644 --- a/drivers/thermal/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel_quark_dts_thermal.c @@ -186,7 +186,7 @@ static int soc_dts_disable(struct thermal_zone_device *tzd) return ret; } -static int _get_trip_temp(int trip, unsigned long *temp) +static int _get_trip_temp(int trip, int *temp) { int status; u32 out; @@ -212,19 +212,18 @@ static int _get_trip_temp(int trip, unsigned long *temp) } static inline int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, unsigned long *temp) + int trip, int *temp) { return _get_trip_temp(trip, temp); } -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, - unsigned long *temp) +static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp) { return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp); } static int update_trip_temp(struct soc_sensor_entry *aux_entry, - int trip, unsigned long temp) + int trip, int temp) { u32 out; u32 temp_out; @@ -272,7 +271,7 @@ failed: } static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { return update_trip_temp(tzd->devdata, trip, temp); } @@ -289,7 +288,7 @@ static int sys_get_trip_type(struct thermal_zone_device *thermal, } static int sys_get_curr_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { u32 out; int ret; diff --git a/drivers/thermal/intel_soc_dts_iosf.c b/drivers/thermal/intel_soc_dts_iosf.c index 42e4b6a..5841d1d 100644 --- a/drivers/thermal/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel_soc_dts_iosf.c @@ -80,7 +80,7 @@ err_ret: } static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long *temp) + int *temp) { int status; u32 out; @@ -106,7 +106,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip, } static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts, - int thres_index, unsigned long temp, + int thres_index, int temp, enum thermal_trip_type trip_type) { int status; @@ -196,7 +196,7 @@ err_restore_ptps: } static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { struct intel_soc_dts_sensor_entry *dts = tzd->devdata; struct intel_soc_dts_sensors *sensors = dts->sensors; @@ -226,7 +226,7 @@ static int sys_get_trip_type(struct thermal_zone_device *tzd, } static int sys_get_curr_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { int status; u32 out; diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index 11041fe..8922366 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -33,7 +33,7 @@ struct kirkwood_thermal_priv { }; static int kirkwood_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { unsigned long reg; struct kirkwood_thermal_priv *priv = thermal->devdata; diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index b295b2b..42b7d42 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -91,7 +91,7 @@ struct __thermal_zone { /*** DT thermal zone device callbacks ***/ static int of_thermal_get_temp(struct thermal_zone_device *tz, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; @@ -177,7 +177,7 @@ EXPORT_SYMBOL_GPL(of_thermal_get_trip_points); * Return: zero on success, error code otherwise */ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, - unsigned long temp) + int temp) { struct __thermal_zone *data = tz->devdata; @@ -311,7 +311,7 @@ static int of_thermal_get_trip_type(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; @@ -324,7 +324,7 @@ static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip, } static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long temp) + int temp) { struct __thermal_zone *data = tz->devdata; @@ -338,7 +338,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip, - unsigned long *hyst) + int *hyst) { struct __thermal_zone *data = tz->devdata; @@ -351,7 +351,7 @@ static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip, } static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip, - unsigned long hyst) + int hyst) { struct __thermal_zone *data = tz->devdata; @@ -365,7 +365,7 @@ static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_crit_temp(struct thermal_zone_device *tz, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; int i; diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 4672250..045aea59 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -92,8 +92,8 @@ struct power_allocator_params { * Return: The power budget for the next period. */ static u32 pid_controller(struct thermal_zone_device *tz, - unsigned long current_temp, - unsigned long control_temp, + int current_temp, + int control_temp, u32 max_allocatable_power) { s64 p, i, d, power_range; @@ -102,7 +102,7 @@ static u32 pid_controller(struct thermal_zone_device *tz, max_power_frac = int_to_frac(max_allocatable_power); - err = ((s32)control_temp - (s32)current_temp); + err = control_temp - current_temp; err = int_to_frac(err); /* Calculate the proportional term */ @@ -223,8 +223,8 @@ static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, } static int allocate_power(struct thermal_zone_device *tz, - unsigned long current_temp, - unsigned long control_temp) + int current_temp, + int control_temp) { struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; @@ -326,7 +326,7 @@ static int allocate_power(struct thermal_zone_device *tz, granted_power, total_granted_power, num_actors, power_range, max_allocatable_power, current_temp, - (s32)control_temp - (s32)current_temp); + control_temp - current_temp); devm_kfree(&tz->device, req_power); unlock: @@ -411,7 +411,7 @@ static int power_allocator_bind(struct thermal_zone_device *tz) { int ret; struct power_allocator_params *params; - unsigned long switch_on_temp, control_temp; + int switch_on_temp, control_temp; u32 temperature_threshold; if (!tz->tzp || !tz->tzp->sustainable_power) { @@ -476,7 +476,7 @@ static void power_allocator_unbind(struct thermal_zone_device *tz) static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) { int ret; - unsigned long switch_on_temp, control_temp, current_temp; + int switch_on_temp, control_temp, current_temp; struct power_allocator_params *params = tz->governor_data; /* diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c index c8d27b8..b677aad 100644 --- a/drivers/thermal/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom-spmi-temp-alarm.c @@ -117,7 +117,7 @@ static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip) return 0; } -static int qpnp_tm_get_temp(void *data, long *temp) +static int qpnp_tm_get_temp(void *data, int *temp) { struct qpnp_tm_chip *chip = data; int ret, mili_celsius; diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index fe4e767..5d4ae7d 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -200,8 +200,7 @@ err_out_unlock: return ret; } -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp) { struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); @@ -235,7 +234,7 @@ static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, } static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); struct device *dev = rcar_priv_to_dev(priv); @@ -299,7 +298,7 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) static void rcar_thermal_work(struct work_struct *work) { struct rcar_thermal_priv *priv; - unsigned long cctemp, nctemp; + int cctemp, nctemp; priv = container_of(work, struct rcar_thermal_priv, work.work); diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index cd8f5f93..c89ffb2 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -64,7 +64,7 @@ struct rockchip_tsadc_chip { void (*control)(void __iomem *reg, bool on); /* Per-sensor methods */ - int (*get_temp)(int chn, void __iomem *reg, long *temp); + int (*get_temp)(int chn, void __iomem *reg, int *temp); void (*set_tshut_temp)(int chn, void __iomem *reg, long temp); void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m); }; @@ -191,7 +191,7 @@ static u32 rk_tsadcv2_temp_to_code(long temp) return 0; } -static long rk_tsadcv2_code_to_temp(u32 code) +static int rk_tsadcv2_code_to_temp(u32 code) { unsigned int low = 0; unsigned int high = ARRAY_SIZE(v2_code_table) - 1; @@ -277,7 +277,7 @@ static void rk_tsadcv2_control(void __iomem *regs, bool enable) writel_relaxed(val, regs + TSADCV2_AUTO_CON); } -static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp) +static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp) { u32 val; @@ -366,7 +366,7 @@ static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev) return IRQ_HANDLED; } -static int rockchip_thermal_get_temp(void *_sensor, long *out_temp) +static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) { struct rockchip_thermal_sensor *sensor = _sensor; struct rockchip_thermal_data *thermal = sensor->thermal; @@ -374,7 +374,7 @@ static int rockchip_thermal_get_temp(void *_sensor, long *out_temp) int retval; retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp); - dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n", + dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %d, retval: %d\n", sensor->id, *out_temp, retval); return retval; diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 531f4b17..9ec29a3 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -207,8 +207,7 @@ struct exynos_tmu_data { int (*tmu_initialize)(struct platform_device *pdev); void (*tmu_control)(struct platform_device *pdev, bool on); int (*tmu_read)(struct exynos_tmu_data *data); - void (*tmu_set_emulation)(struct exynos_tmu_data *data, - unsigned long temp); + void (*tmu_set_emulation)(struct exynos_tmu_data *data, int temp); void (*tmu_clear_irqs)(struct exynos_tmu_data *data); }; @@ -216,7 +215,7 @@ static void exynos_report_trigger(struct exynos_tmu_data *p) { char data[10], *envp[] = { data, NULL }; struct thermal_zone_device *tz = p->tzd; - unsigned long temp; + int temp; unsigned int i; if (!tz) { @@ -517,7 +516,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev) struct thermal_zone_device *tz = data->tzd; unsigned int status, trim_info; unsigned int rising_threshold = 0, falling_threshold = 0; - unsigned long temp, temp_hist; + int temp, temp_hist; int ret = 0, threshold_code, i, sensor_id, cal_type; status = readb(data->base + EXYNOS_TMU_REG_STATUS); @@ -610,7 +609,7 @@ static int exynos5440_tmu_initialize(struct platform_device *pdev) struct exynos_tmu_data *data = platform_get_drvdata(pdev); unsigned int trim_info = 0, con, rising_threshold; int ret = 0, threshold_code; - unsigned long crit_temp = 0; + int crit_temp = 0; /* * For exynos5440 soc triminfo value is swapped between TMU0 and @@ -663,7 +662,7 @@ static int exynos7_tmu_initialize(struct platform_device *pdev) unsigned int status, trim_info; unsigned int rising_threshold = 0, falling_threshold = 0; int ret = 0, threshold_code, i; - unsigned long temp, temp_hist; + int temp, temp_hist; unsigned int reg_off, bit_off; status = readb(data->base + EXYNOS_TMU_REG_STATUS); @@ -876,7 +875,7 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) writel(con, data->base + EXYNOS_TMU_REG_CONTROL); } -static int exynos_get_temp(void *p, long *temp) +static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; @@ -896,7 +895,7 @@ static int exynos_get_temp(void *p, long *temp) #ifdef CONFIG_THERMAL_EMULATION static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val, - unsigned long temp) + int temp) { if (temp) { temp /= MCELSIUS; @@ -926,7 +925,7 @@ static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val, } static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, - unsigned long temp) + int temp) { unsigned int val; u32 emul_con; @@ -946,7 +945,7 @@ static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, } static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data, - unsigned long temp) + int temp) { unsigned int val; @@ -955,7 +954,7 @@ static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data, writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG); } -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +static int exynos_tmu_set_emulation(void *drv_data, int temp) { struct exynos_tmu_data *data = drv_data; int ret = -EINVAL; @@ -978,7 +977,7 @@ out: #else #define exynos4412_tmu_set_emulation NULL #define exynos5440_tmu_set_emulation NULL -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +static int exynos_tmu_set_emulation(void *drv_data, int temp) { return -EINVAL; } #endif /* CONFIG_THERMAL_EMULATION */ diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index bddb717..534dd91 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -38,7 +38,7 @@ struct spear_thermal_dev { }; static inline int thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct spear_thermal_dev *stdev = thermal->devdata; diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c index 76c515d..44cbba9 100644 --- a/drivers/thermal/st/st_thermal.c +++ b/drivers/thermal/st/st_thermal.c @@ -111,8 +111,7 @@ static int st_thermal_calibration(struct st_thermal_sensor *sensor) } /* Callback to get temperature from HW*/ -static int st_thermal_get_temp(struct thermal_zone_device *th, - unsigned long *temperature) +static int st_thermal_get_temp(struct thermal_zone_device *th, int *temperature) { struct st_thermal_sensor *sensor = th->devdata; struct device *dev = sensor->dev; @@ -159,7 +158,7 @@ static int st_thermal_get_trip_type(struct thermal_zone_device *th, } static int st_thermal_get_trip_temp(struct thermal_zone_device *th, - int trip, unsigned long *temp) + int trip, int *temp) { struct st_thermal_sensor *sensor = th->devdata; struct device *dev = sensor->dev; diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 5a0f12d..2f9f708 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -113,7 +113,7 @@ static void update_passive_instance(struct thermal_zone_device *tz, static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { - long trip_temp; + int trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; struct thermal_instance *instance; @@ -135,7 +135,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trace_thermal_zone_trip(tz, trip, trip_type); } - dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n", + dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n", trip, trip_type, trip_temp, trend, throttle); mutex_lock(&tz->lock); diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c index 9197fc0..74ea576 100644 --- a/drivers/thermal/tegra_soctherm.c +++ b/drivers/thermal/tegra_soctherm.c @@ -293,7 +293,7 @@ static int enable_tsensor(struct tegra_soctherm *tegra, * H denotes an addition of 0.5 Celsius and N denotes negation * of the final value. */ -static long translate_temp(u16 val) +static int translate_temp(u16 val) { long t; @@ -306,7 +306,7 @@ static long translate_temp(u16 val) return t; } -static int tegra_thermctl_get_temp(void *data, long *out_temp) +static int tegra_thermctl_get_temp(void *data, int *out_temp) { struct tegra_thermctl_zone *zone = data; u32 val; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index c470095..387c428 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -426,7 +426,7 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, static void handle_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { - long trip_temp; + int trip_temp; tz->ops->get_trip_temp(tz, trip, &trip_temp); @@ -474,12 +474,12 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) * * Return: On success returns 0, an error code otherwise */ -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { int ret = -EINVAL; #ifdef CONFIG_THERMAL_EMULATION int count; - unsigned long crit_temp = -1UL; + int crit_temp = INT_MAX; enum thermal_trip_type type; #endif @@ -516,8 +516,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_get_temp); static void update_temperature(struct thermal_zone_device *tz) { - long temp; - int ret; + int temp, ret; ret = thermal_zone_get_temp(tz, &temp); if (ret) { @@ -577,15 +576,14 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_zone_device *tz = to_thermal_zone(dev); - long temperature; - int ret; + int temperature, ret; ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -689,7 +687,7 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - long temperature; + int temperature; if (!tz->ops->get_trip_temp) return -EPERM; @@ -702,7 +700,7 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr, if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -711,7 +709,7 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - unsigned long temperature; + int temperature; if (!tz->ops->set_trip_hyst) return -EPERM; @@ -719,7 +717,7 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, if (!sscanf(attr->attr.name, "trip_point_%d_hyst", &trip)) return -EINVAL; - if (kstrtoul(buf, 10, &temperature)) + if (kstrtoint(buf, 10, &temperature)) return -EINVAL; /* @@ -738,7 +736,7 @@ trip_point_hyst_show(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - unsigned long temperature; + int temperature; if (!tz->ops->get_trip_hyst) return -EPERM; @@ -748,7 +746,7 @@ trip_point_hyst_show(struct device *dev, struct device_attribute *attr, ret = tz->ops->get_trip_hyst(tz, trip, &temperature); - return ret ? ret : sprintf(buf, "%ld\n", temperature); + return ret ? ret : sprintf(buf, "%d\n", temperature); } static ssize_t diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 1967bee..06fd2ed9 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -69,7 +69,7 @@ static DEVICE_ATTR(name, 0444, name_show, NULL); static ssize_t temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) { - long temperature; + int temperature; int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); @@ -83,7 +83,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -95,14 +95,14 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); struct thermal_zone_device *tz = temp->tz; - long temperature; + int temperature; int ret; ret = tz->ops->get_trip_temp(tz, 0, &temperature); if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } @@ -142,7 +142,7 @@ thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz) { - unsigned long temp; + int temp; return tz->ops->get_crit_temp && !tz->ops->get_crit_temp(tz, &temp); } diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index c7c5b37..b213a12 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -76,14 +76,14 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c) /* thermal zone ops */ /* Get temperature callback function for thermal zone */ -static inline int __ti_thermal_get_temp(void *devdata, long *temp) +static inline int __ti_thermal_get_temp(void *devdata, int *temp) { struct thermal_zone_device *pcb_tz = NULL; struct ti_thermal_data *data = devdata; struct ti_bandgap *bgp; const struct ti_temp_sensor *s; int ret, tmp, slope, constant; - unsigned long pcb_temp; + int pcb_temp; if (!data) return 0; @@ -119,7 +119,7 @@ static inline int __ti_thermal_get_temp(void *devdata, long *temp) } static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct ti_thermal_data *data = thermal->devdata; @@ -229,7 +229,7 @@ static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal, /* Get trip temperature callback functions for thermal zone */ static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { if (!ti_thermal_is_valid_trip(trip)) return -EINVAL; @@ -280,7 +280,7 @@ static int ti_thermal_get_trend(struct thermal_zone_device *thermal, /* Get critical temperature callback functions for thermal zone */ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { /* shutdown zone */ return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp); diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 50d1d2c..7fc919f 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -164,7 +164,7 @@ err_ret: return err; } -static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) { u32 eax, edx; struct phy_dev_entry *phy_dev_entry; @@ -175,7 +175,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *tem if (eax & 0x80000000) { *temp = phy_dev_entry->tj_max - ((eax >> 16) & 0x7f) * 1000; - pr_debug("sys_get_curr_temp %ld\n", *temp); + pr_debug("sys_get_curr_temp %d\n", *temp); return 0; } @@ -183,7 +183,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *tem } static int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, unsigned long *temp) + int trip, int *temp) { u32 eax, edx; struct phy_dev_entry *phy_dev_entry; @@ -214,13 +214,13 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, *temp = phy_dev_entry->tj_max - thres_reg_value * 1000; else *temp = 0; - pr_debug("sys_get_trip_temp %ld\n", *temp); + pr_debug("sys_get_trip_temp %d\n", *temp); return 0; } static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { u32 l, h; struct phy_dev_entry *phy_dev_entry; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 037e9df..17292fe 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,23 +92,19 @@ struct thermal_zone_device_ops { struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); - int (*get_temp) (struct thermal_zone_device *, unsigned long *); + int (*get_temp) (struct thermal_zone_device *, int *); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*get_trip_type) (struct thermal_zone_device *, int, enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_temp) (struct thermal_zone_device *, int, - unsigned long); - int (*get_trip_hyst) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_hyst) (struct thermal_zone_device *, int, - unsigned long); - int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); - int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); + int (*get_trip_temp) (struct thermal_zone_device *, int, int *); + int (*set_trip_temp) (struct thermal_zone_device *, int, int); + int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); + int (*set_trip_hyst) (struct thermal_zone_device *, int, int); + int (*get_crit_temp) (struct thermal_zone_device *, int *); + int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -332,9 +328,9 @@ struct thermal_genl_event { * temperature. */ struct thermal_zone_of_device_ops { - int (*get_temp)(void *, long *); + int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); - int (*set_emul_temp)(void *, unsigned long); + int (*set_emul_temp)(void *, int); }; /** @@ -406,7 +402,7 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, const struct thermal_cooling_device_ops *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp); +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -457,7 +453,7 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( const char *name) { return ERR_PTR(-ENODEV); } static inline int thermal_zone_get_temp( - struct thermal_zone_device *tz, unsigned long *temp) + struct thermal_zone_device *tz, int *temp) { return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h index 12e1321..5afae8f 100644 --- a/include/trace/events/thermal_power_allocator.h +++ b/include/trace/events/thermal_power_allocator.h @@ -11,7 +11,7 @@ TRACE_EVENT(thermal_power_allocator, u32 total_req_power, u32 *granted_power, u32 total_granted_power, size_t num_actors, u32 power_range, u32 max_allocatable_power, - unsigned long current_temp, s32 delta_temp), + int current_temp, s32 delta_temp), TP_ARGS(tz, req_power, total_req_power, granted_power, total_granted_power, num_actors, power_range, max_allocatable_power, current_temp, delta_temp), @@ -24,7 +24,7 @@ TRACE_EVENT(thermal_power_allocator, __field(size_t, num_actors ) __field(u32, power_range ) __field(u32, max_allocatable_power ) - __field(unsigned long, current_temp ) + __field(int, current_temp ) __field(s32, delta_temp ) ), TP_fast_assign( @@ -42,7 +42,7 @@ TRACE_EVENT(thermal_power_allocator, __entry->delta_temp = delta_temp; ), - TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d", + TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%d delta_temperature=%d", __entry->tz_id, __print_array(__get_dynamic_array(req_power), __entry->num_actors, 4), -- cgit v0.10.2 From f6be0584930995f88ea3381cbcbcb315c2a184ad Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 6 Jul 2015 09:46:14 +0200 Subject: thermal: trivial: fix typo in comment Signed-off-by: Sascha Hauer Acked-by: Eduardo Valentin Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 387c428..fcd1c17 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -465,7 +465,7 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) } /** - * thermal_zone_get_temp() - returns its the temperature of thermal zone + * thermal_zone_get_temp() - returns the temperature of a thermal zone * @tz: a valid pointer to a struct thermal_zone_device * @temp: a valid pointer to where to store the resulting temperature. * -- cgit v0.10.2 From dbdf2532b46256584447cd7e742bfb1b31ac56c4 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 6 Jul 2015 09:46:15 +0200 Subject: thermal: remove unnecessary call to thermal_zone_device_set_polling When the thermal zone has no get_temp callback then thermal_zone_device_register() calls thermal_zone_device_set_polling() with a polling delay of 0. This only cancels the poll_queue. Since the poll_queue hasn't been scheduled this is a no-op. Remove it. Signed-off-by: Sascha Hauer Acked-by: Eduardo Valentin Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index fcd1c17..305e381 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1873,9 +1873,6 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check); - if (!tz->ops->get_temp) - thermal_zone_device_set_polling(tz, 0); - thermal_zone_device_update(tz); return tz; -- cgit v0.10.2 From 79e5421cf0bd9b3e56f523e95e3511757edb9616 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 6 Jul 2015 09:46:16 +0200 Subject: thermal: Use IS_ENABLED instead of #ifdef Use IS_ENABLED(CONFIG_THERMAL_EMULATION) to make the code more readable and to get rid of the addtional #ifdef around the variable definitions in thermal_zone_get_temp(). Signed-off-by: Sascha Hauer Reviewed-by: Lukasz Majewski Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 305e381..9599465 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -477,11 +477,9 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { int ret = -EINVAL; -#ifdef CONFIG_THERMAL_EMULATION int count; int crit_temp = INT_MAX; enum thermal_trip_type type; -#endif if (!tz || IS_ERR(tz) || !tz->ops->get_temp) goto exit; @@ -489,25 +487,21 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) mutex_lock(&tz->lock); ret = tz->ops->get_temp(tz, temp); -#ifdef CONFIG_THERMAL_EMULATION - if (!tz->emul_temperature) - goto skip_emul; - - for (count = 0; count < tz->trips; count++) { - ret = tz->ops->get_trip_type(tz, count, &type); - if (!ret && type == THERMAL_TRIP_CRITICAL) { - ret = tz->ops->get_trip_temp(tz, count, &crit_temp); - break; - } - } - if (ret) - goto skip_emul; + if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) { + for (count = 0; count < tz->trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, + &crit_temp); + break; + } + } - if (*temp < crit_temp) - *temp = tz->emul_temperature; -skip_emul: -#endif + if (!ret && *temp < crit_temp) + *temp = tz->emul_temperature; + } + mutex_unlock(&tz->lock); exit: return ret; @@ -866,7 +860,6 @@ available_policies_show(struct device *dev, struct device_attribute *devattr, return count; } -#ifdef CONFIG_THERMAL_EMULATION static ssize_t emul_temp_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -892,7 +885,6 @@ emul_temp_store(struct device *dev, struct device_attribute *attr, return ret ? ret : count; } static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); -#endif/*CONFIG_THERMAL_EMULATION*/ static ssize_t sustainable_power_show(struct device *dev, struct device_attribute *devattr, @@ -1822,11 +1814,12 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; } -#ifdef CONFIG_THERMAL_EMULATION - result = device_create_file(&tz->device, &dev_attr_emul_temp); - if (result) - goto unregister; -#endif + if (IS_ENABLED(CONFIG_THERMAL_EMULATION)) { + result = device_create_file(&tz->device, &dev_attr_emul_temp); + if (result) + goto unregister; + } + /* Create policy attribute */ result = device_create_file(&tz->device, &dev_attr_policy); if (result) -- cgit v0.10.2 From 934c93b8c193b62fc86256ea64aef65e9a7b4e9e Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 6 Jul 2015 09:46:17 +0200 Subject: thermal: Add comment explaining test for critical temperature The code testing if a temperature should be emulated or not is not obvious. Add a comment explaining why this test is done. Signed-off-by: Sascha Hauer Reviewed-by: Mikko Perttunen Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 9599465..6a70778 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -498,6 +498,11 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) } } + /* + * Only allow emulating a temperature when the real temperature + * is below the critical temperature so that the emulation code + * cannot hide critical conditions. + */ if (!ret && *temp < crit_temp) *temp = tz->emul_temperature; } -- cgit v0.10.2 From d0a12625d2ff2c63321b3cf48c48184748ab577a Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 10 Jun 2015 13:34:24 -0700 Subject: thermal: Add Intel PCH thermal driver This change adds a thermal driver for Wildcat Point platform controller hub. This driver register PCH thermal sensor as a thermal zone and associate critical and hot trips if present. Signed-off-by: Tushar Dave Reviewed-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 118938e..0390044 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -340,6 +340,14 @@ config ACPI_THERMAL_REL tristate depends on ACPI +config INTEL_PCH_THERMAL + tristate "Intel PCH Thermal Reporting Driver" + depends on X86 && PCI + help + Enable this to support thermal reporting on certain intel PCHs. + Thermal reporting device will provide temperature reading, + programmable trip points and other information. + menu "Texas Instruments thermal drivers" source "drivers/thermal/ti-soc-thermal/Kconfig" endmenu diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 535dfee..26f1608 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ +obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o obj-$(CONFIG_ST_THERMAL) += st/ obj-$(CONFIG_TEGRA_SOCTHERM) += tegra_soctherm.o obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c new file mode 100644 index 0000000..1650a62 --- /dev/null +++ b/drivers/thermal/intel_pch_thermal.c @@ -0,0 +1,286 @@ +/* intel_pch_thermal.c - Intel PCH Thermal driver + * + * Copyright (c) 2015, Intel Corporation. + * + * Authors: + * Tushar Dave + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include + +/* Intel PCH thermal Device IDs */ +#define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */ + +/* Wildcat Point-LP PCH Thermal registers */ +#define WPT_TEMP 0x0000 /* Temperature */ +#define WPT_TSC 0x04 /* Thermal Sensor Control */ +#define WPT_TSS 0x06 /* Thermal Sensor Status */ +#define WPT_TSEL 0x08 /* Thermal Sensor Enable and Lock */ +#define WPT_TSREL 0x0A /* Thermal Sensor Report Enable and Lock */ +#define WPT_TSMIC 0x0C /* Thermal Sensor SMI Control */ +#define WPT_CTT 0x0010 /* Catastrophic Trip Point */ +#define WPT_TAHV 0x0014 /* Thermal Alert High Value */ +#define WPT_TALV 0x0018 /* Thermal Alert Low Value */ +#define WPT_TL 0x00000040 /* Throttle Value */ +#define WPT_PHL 0x0060 /* PCH Hot Level */ +#define WPT_PHLC 0x62 /* PHL Control */ +#define WPT_TAS 0x80 /* Thermal Alert Status */ +#define WPT_TSPIEN 0x82 /* PCI Interrupt Event Enables */ +#define WPT_TSGPEN 0x84 /* General Purpose Event Enables */ + +/* Wildcat Point-LP PCH Thermal Register bit definitions */ +#define WPT_TEMP_TSR 0x00ff /* Temp TS Reading */ +#define WPT_TSC_CPDE 0x01 /* Catastrophic Power-Down Enable */ +#define WPT_TSS_TSDSS 0x10 /* Thermal Sensor Dynamic Shutdown Status */ +#define WPT_TSS_GPES 0x08 /* GPE status */ +#define WPT_TSEL_ETS 0x01 /* Enable TS */ +#define WPT_TSEL_PLDB 0x80 /* TSEL Policy Lock-Down Bit */ +#define WPT_TL_TOL 0x000001FF /* T0 Level */ +#define WPT_TL_T1L 0x1ff00000 /* T1 Level */ +#define WPT_TL_TTEN 0x20000000 /* TT Enable */ + +static char driver_name[] = "Intel PCH thermal driver"; + +struct pch_thermal_device { + void __iomem *hw_base; + const struct pch_dev_ops *ops; + struct pci_dev *pdev; + struct thermal_zone_device *tzd; + int crt_trip_id; + unsigned long crt_temp; + int hot_trip_id; + unsigned long hot_temp; +}; + +static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) +{ + u8 tsel; + u16 trip_temp; + + *nr_trips = 0; + + /* Check if BIOS has already enabled thermal sensor */ + if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) + goto read_trips; + + tsel = readb(ptd->hw_base + WPT_TSEL); + /* + * When TSEL's Policy Lock-Down bit is 1, TSEL become RO. + * If so, thermal sensor cannot enable. Bail out. + */ + if (tsel & WPT_TSEL_PLDB) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + return -ENODEV; + } + + writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + if (!(WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + return -ENODEV; + } + +read_trips: + ptd->crt_trip_id = -1; + trip_temp = readw(ptd->hw_base + WPT_CTT); + trip_temp &= 0x1FF; + if (trip_temp) { + /* Resolution of 1/2 degree C and an offset of -50C */ + ptd->crt_temp = trip_temp * 1000 / 2 - 50000; + ptd->crt_trip_id = 0; + ++(*nr_trips); + } + + ptd->hot_trip_id = -1; + trip_temp = readw(ptd->hw_base + WPT_PHL); + trip_temp &= 0x1FF; + if (trip_temp) { + /* Resolution of 1/2 degree C and an offset of -50C */ + ptd->hot_temp = trip_temp * 1000 / 2 - 50000; + ptd->hot_trip_id = *nr_trips; + ++(*nr_trips); + } + + return 0; +} + +static int pch_wpt_get_temp(struct pch_thermal_device *ptd, + unsigned long *temp) +{ + u8 wpt_temp; + + wpt_temp = WPT_TEMP_TSR & readl(ptd->hw_base + WPT_TEMP); + + /* Resolution of 1/2 degree C and an offset of -50C */ + *temp = (wpt_temp * 1000 / 2 - 50000); + + return 0; +} + +struct pch_dev_ops { + int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); + int (*get_temp)(struct pch_thermal_device *ptd, unsigned long *temp); +}; + + +/* dev ops for Wildcat Point */ +static struct pch_dev_ops pch_dev_ops_wpt = { + .hw_init = pch_wpt_init, + .get_temp = pch_wpt_get_temp, +}; + +static int pch_thermal_get_temp(struct thermal_zone_device *tzd, + unsigned long *temp) +{ + struct pch_thermal_device *ptd = tzd->devdata; + + return ptd->ops->get_temp(ptd, temp); +} + +static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip, + enum thermal_trip_type *type) +{ + struct pch_thermal_device *ptd = tzd->devdata; + + if (ptd->crt_trip_id == trip) + *type = THERMAL_TRIP_CRITICAL; + else if (ptd->hot_trip_id == trip) + *type = THERMAL_TRIP_HOT; + else + return -EINVAL; + + return 0; +} + +static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, + unsigned long *temp) +{ + struct pch_thermal_device *ptd = tzd->devdata; + + if (ptd->crt_trip_id == trip) + *temp = ptd->crt_temp; + else if (ptd->hot_trip_id == trip) + *temp = ptd->hot_temp; + else + return -EINVAL; + + return 0; +} + +static struct thermal_zone_device_ops tzd_ops = { + .get_temp = pch_thermal_get_temp, + .get_trip_type = pch_get_trip_type, + .get_trip_temp = pch_get_trip_temp, +}; + + +static int intel_pch_thermal_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct pch_thermal_device *ptd; + int err; + int nr_trips; + char *dev_name; + + ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL); + if (!ptd) + return -ENOMEM; + + switch (pdev->device) { + case PCH_THERMAL_DID_WPT: + ptd->ops = &pch_dev_ops_wpt; + dev_name = "pch_wildcat_point"; + break; + default: + dev_err(&pdev->dev, "unknown pch thermal device\n"); + return -ENODEV; + } + + pci_set_drvdata(pdev, ptd); + ptd->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device\n"); + return err; + } + + err = pci_request_regions(pdev, driver_name); + if (err) { + dev_err(&pdev->dev, "failed to request pci region\n"); + goto error_disable; + } + + ptd->hw_base = pci_ioremap_bar(pdev, 0); + if (!ptd->hw_base) { + err = -ENOMEM; + dev_err(&pdev->dev, "failed to map mem base\n"); + goto error_release; + } + + err = ptd->ops->hw_init(ptd, &nr_trips); + if (err) + goto error_cleanup; + + ptd->tzd = thermal_zone_device_register(dev_name, nr_trips, 0, ptd, + &tzd_ops, NULL, 0, 0); + if (IS_ERR(ptd->tzd)) { + dev_err(&pdev->dev, "Failed to register thermal zone %s\n", + dev_name); + err = PTR_ERR(ptd->tzd); + goto error_cleanup; + } + + return 0; + +error_cleanup: + iounmap(ptd->hw_base); +error_release: + pci_release_regions(pdev); +error_disable: + pci_disable_device(pdev); + dev_err(&pdev->dev, "pci device failed to probe\n"); + return err; +} + +static void intel_pch_thermal_remove(struct pci_dev *pdev) +{ + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + + thermal_zone_device_unregister(ptd->tzd); + iounmap(ptd->hw_base); + pci_set_drvdata(pdev, NULL); + pci_release_region(pdev, 0); + pci_disable_device(pdev); +} + +static struct pci_device_id intel_pch_thermal_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); + +static struct pci_driver intel_pch_thermal_driver = { + .name = "intel_pch_thermal", + .id_table = intel_pch_thermal_id, + .probe = intel_pch_thermal_probe, + .remove = intel_pch_thermal_remove, +}; + +module_pci_driver(intel_pch_thermal_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel PCH Thermal driver"); -- cgit v0.10.2 From 0f6e2825ce916eed882996bb6e9148c13ecebefd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:43 -0300 Subject: [media] vb2: Push mmap_sem down to memops Currently vb2 core acquires mmap_sem just around call to __qbuf_userptr(). However since commit f035eb4e976ef5 (videobuf2: fix lockdep warning) it isn't necessary to acquire it so early as we no longer have to drop queue mutex before acquiring mmap_sem. So push acquisition of mmap_sem down into .get_userptr memop so that the semaphore is acquired for a shorter time and it is clearer what it is needed for. Note that we also need mmap_sem in .put_userptr memop since that ends up calling vb2_put_vma() which calls vma->vm_ops->close() which should be called with mmap_sem held. However we didn't hold mmap_sem in some code paths anyway (e.g. when called via vb2_ioctl_reqbufs() -> __vb2_queue_free() -> vb2_dma_sg_put_userptr()) and getting mmap_sem in put_userptr() introduces a lock inversion with queue->mmap_lock in the above mentioned call path. Luckily this whole locking mess will get resolved once we convert videobuf2 core to the new mm helper which avoids the need for mmap_sem in .put_userptr memop altogether. Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index b866a6b..243b28d 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1681,9 +1681,7 @@ static int __buf_prepare(struct vb2_buffer *vb, const struct v4l2_buffer *b) ret = __qbuf_mmap(vb, b); break; case V4L2_MEMORY_USERPTR: - down_read(¤t->mm->mmap_sem); ret = __qbuf_userptr(vb, b); - up_read(¤t->mm->mmap_sem); break; case V4L2_MEMORY_DMABUF: ret = __qbuf_dmabuf(vb, b); diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index 94c1e64..c548ce4 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -616,6 +616,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, goto fail_buf; } + down_read(¤t->mm->mmap_sem); /* current->mm->mmap_sem is taken by videobuf2 core */ vma = find_vma(current->mm, vaddr); if (!vma) { @@ -642,6 +643,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, if (ret) { unsigned long pfn; if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) { + up_read(¤t->mm->mmap_sem); buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, pfn); buf->size = size; kfree(pages); @@ -651,6 +653,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, pr_err("failed to get user pages\n"); goto fail_vma; } + up_read(¤t->mm->mmap_sem); sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { @@ -713,10 +716,12 @@ fail_get_user_pages: while (n_pages) put_page(pages[--n_pages]); + down_read(¤t->mm->mmap_sem); fail_vma: vb2_put_vma(buf->vma); fail_pages: + up_read(¤t->mm->mmap_sem); kfree(pages); /* kfree is NULL-proof */ fail_buf: diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c index 7289b81..d2cf113 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c @@ -264,6 +264,7 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, if (!buf->pages) goto userptr_fail_alloc_pages; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, vaddr); if (!vma) { dprintk(1, "no vma for address %lu\n", vaddr); @@ -302,6 +303,7 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, 1, /* force */ buf->pages, NULL); + up_read(¤t->mm->mmap_sem); if (num_pages_from_user != buf->num_pages) goto userptr_fail_get_user_pages; @@ -331,8 +333,10 @@ userptr_fail_get_user_pages: if (!vma_is_io(buf->vma)) while (--num_pages_from_user >= 0) put_page(buf->pages[num_pages_from_user]); + down_read(¤t->mm->mmap_sem); vb2_put_vma(buf->vma); userptr_fail_find_vma: + up_read(¤t->mm->mmap_sem); kfree(buf->pages); userptr_fail_alloc_pages: kfree(buf); diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c index 2fe4c27..63bef95 100644 --- a/drivers/media/v4l2-core/videobuf2-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c @@ -89,7 +89,7 @@ static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr, offset = vaddr & ~PAGE_MASK; buf->size = size; - + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, vaddr); if (vma && (vma->vm_flags & VM_PFNMAP) && (vma->vm_pgoff)) { if (vb2_get_contig_userptr(vaddr, size, &vma, &physp)) @@ -121,6 +121,7 @@ static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr, if (!buf->vaddr) goto fail_get_user_pages; } + up_read(¤t->mm->mmap_sem); buf->vaddr += offset; return buf; @@ -133,6 +134,7 @@ fail_get_user_pages: kfree(buf->pages); fail_pages_array_alloc: + up_read(¤t->mm->mmap_sem); kfree(buf); return NULL; -- cgit v0.10.2 From 8025e5ddf9c1cac0e632dad49a63abf7848b78cb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:44 -0300 Subject: [media] mm: Provide new get_vaddr_frames() helper Provide new function get_vaddr_frames(). This function maps virtual addresses from given start and fills given array with page frame numbers of the corresponding pages. If given start belongs to a normal vma, the function grabs reference to each of the pages to pin them in memory. If start belongs to VM_IO | VM_PFNMAP vma, we don't touch page structures. Caller must make sure pfns aren't reused for anything else while he is using them. This function is created for various drivers to simplify handling of their buffers. Signed-off-by: Jan Kara Acked-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Andrew Morton Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e872f9..79ad29a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -20,6 +20,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1198,6 +1199,49 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); + +/* Container for pinned pfns / pages */ +struct frame_vector { + unsigned int nr_allocated; /* Number of frames we have space for */ + unsigned int nr_frames; /* Number of frames stored in ptrs array */ + bool got_ref; /* Did we pin pages by getting page ref? */ + bool is_pfns; /* Does array contain pages or pfns? */ + void *ptrs[0]; /* Array of pinned pfns / pages. Use + * pfns_vector_pages() or pfns_vector_pfns() + * for access */ +}; + +struct frame_vector *frame_vector_create(unsigned int nr_frames); +void frame_vector_destroy(struct frame_vector *vec); +int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, + bool write, bool force, struct frame_vector *vec); +void put_vaddr_frames(struct frame_vector *vec); +int frame_vector_to_pages(struct frame_vector *vec); +void frame_vector_to_pfns(struct frame_vector *vec); + +static inline unsigned int frame_vector_count(struct frame_vector *vec) +{ + return vec->nr_frames; +} + +static inline struct page **frame_vector_pages(struct frame_vector *vec) +{ + if (vec->is_pfns) { + int err = frame_vector_to_pages(vec); + + if (err) + return ERR_PTR(err); + } + return (struct page **)(vec->ptrs); +} + +static inline unsigned long *frame_vector_pfns(struct frame_vector *vec) +{ + if (!vec->is_pfns) + frame_vector_to_pfns(vec); + return (unsigned long *)(vec->ptrs); +} + struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); diff --git a/mm/Kconfig b/mm/Kconfig index e79de2b..7f146dd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -654,3 +654,6 @@ config DEFERRED_STRUCT_PAGE_INIT when kswapd starts. This has a potential performance impact on processes running early in the lifetime of the systemm until kswapd finishes the initialisation. + +config FRAME_VECTOR + bool diff --git a/mm/Makefile b/mm/Makefile index 98c4eae..be5d5c8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -78,3 +78,4 @@ obj-$(CONFIG_CMA) += cma.o obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o +obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o diff --git a/mm/frame_vector.c b/mm/frame_vector.c new file mode 100644 index 0000000..cdabcb9 --- /dev/null +++ b/mm/frame_vector.c @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * get_vaddr_frames() - map virtual addresses to pfns + * @start: starting user address + * @nr_frames: number of pages / pfns from start to map + * @write: whether pages will be written to by the caller + * @force: whether to force write access even if user mapping is + * readonly. See description of the same argument of + get_user_pages(). + * @vec: structure which receives pages / pfns of the addresses mapped. + * It should have space for at least nr_frames entries. + * + * This function maps virtual addresses from @start and fills @vec structure + * with page frame numbers or page pointers to corresponding pages (choice + * depends on the type of the vma underlying the virtual address). If @start + * belongs to a normal vma, the function grabs reference to each of the pages + * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't + * touch page structures and the caller must make sure pfns aren't reused for + * anything else while he is using them. + * + * The function returns number of pages mapped which may be less than + * @nr_frames. In particular we stop mapping if there are more vmas of + * different type underlying the specified range of virtual addresses. + * When the function isn't able to map a single page, it returns error. + * + * This function takes care of grabbing mmap_sem as necessary. + */ +int get_vaddr_frames(unsigned long start, unsigned int nr_frames, + bool write, bool force, struct frame_vector *vec) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int ret = 0; + int err; + int locked; + + if (nr_frames == 0) + return 0; + + if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) + nr_frames = vec->nr_allocated; + + down_read(&mm->mmap_sem); + locked = 1; + vma = find_vma_intersection(mm, start, start + 1); + if (!vma) { + ret = -EFAULT; + goto out; + } + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { + vec->got_ref = true; + vec->is_pfns = false; + ret = get_user_pages_locked(current, mm, start, nr_frames, + write, force, (struct page **)(vec->ptrs), &locked); + goto out; + } + + vec->got_ref = false; + vec->is_pfns = true; + do { + unsigned long *nums = frame_vector_pfns(vec); + + while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { + err = follow_pfn(vma, start, &nums[ret]); + if (err) { + if (ret == 0) + ret = err; + goto out; + } + start += PAGE_SIZE; + ret++; + } + /* + * We stop if we have enough pages or if VMA doesn't completely + * cover the tail page. + */ + if (ret >= nr_frames || start < vma->vm_end) + break; + vma = find_vma_intersection(mm, start, start + 1); + } while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP)); +out: + if (locked) + up_read(&mm->mmap_sem); + if (!ret) + ret = -EFAULT; + if (ret > 0) + vec->nr_frames = ret; + return ret; +} +EXPORT_SYMBOL(get_vaddr_frames); + +/** + * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired + * them + * @vec: frame vector to put + * + * Drop references to pages if get_vaddr_frames() acquired them. We also + * invalidate the frame vector so that it is prepared for the next call into + * get_vaddr_frames(). + */ +void put_vaddr_frames(struct frame_vector *vec) +{ + int i; + struct page **pages; + + if (!vec->got_ref) + goto out; + pages = frame_vector_pages(vec); + /* + * frame_vector_pages() might needed to do a conversion when + * get_vaddr_frames() got pages but vec was later converted to pfns. + * But it shouldn't really fail to convert pfns back... + */ + if (WARN_ON(IS_ERR(pages))) + goto out; + for (i = 0; i < vec->nr_frames; i++) + put_page(pages[i]); + vec->got_ref = false; +out: + vec->nr_frames = 0; +} +EXPORT_SYMBOL(put_vaddr_frames); + +/** + * frame_vector_to_pages - convert frame vector to contain page pointers + * @vec: frame vector to convert + * + * Convert @vec to contain array of page pointers. If the conversion is + * successful, return 0. Otherwise return an error. Note that we do not grab + * page references for the page structures. + */ +int frame_vector_to_pages(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (!vec->is_pfns) + return 0; + nums = frame_vector_pfns(vec); + for (i = 0; i < vec->nr_frames; i++) + if (!pfn_valid(nums[i])) + return -EINVAL; + pages = (struct page **)nums; + for (i = 0; i < vec->nr_frames; i++) + pages[i] = pfn_to_page(nums[i]); + vec->is_pfns = false; + return 0; +} +EXPORT_SYMBOL(frame_vector_to_pages); + +/** + * frame_vector_to_pfns - convert frame vector to contain pfns + * @vec: frame vector to convert + * + * Convert @vec to contain array of pfns. + */ +void frame_vector_to_pfns(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (vec->is_pfns) + return; + pages = (struct page **)(vec->ptrs); + nums = (unsigned long *)pages; + for (i = 0; i < vec->nr_frames; i++) + nums[i] = page_to_pfn(pages[i]); + vec->is_pfns = true; +} +EXPORT_SYMBOL(frame_vector_to_pfns); + +/** + * frame_vector_create() - allocate & initialize structure for pinned pfns + * @nr_frames: number of pfns slots we should reserve + * + * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns + * pfns. + */ +struct frame_vector *frame_vector_create(unsigned int nr_frames) +{ + struct frame_vector *vec; + int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames; + + if (WARN_ON_ONCE(nr_frames == 0)) + return NULL; + /* + * This is absurdly high. It's here just to avoid strange effects when + * arithmetics overflows. + */ + if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2)) + return NULL; + /* + * Avoid higher order allocations, use vmalloc instead. It should + * be rare anyway. + */ + if (size <= PAGE_SIZE) + vec = kmalloc(size, GFP_KERNEL); + else + vec = vmalloc(size); + if (!vec) + return NULL; + vec->nr_allocated = nr_frames; + vec->nr_frames = 0; + return vec; +} +EXPORT_SYMBOL(frame_vector_create); + +/** + * frame_vector_destroy() - free memory allocated to carry frame vector + * @vec: Frame vector to free + * + * Free structure allocated by frame_vector_create() to carry frames. + */ +void frame_vector_destroy(struct frame_vector *vec) +{ + /* Make sure put_vaddr_frames() got called properly... */ + VM_BUG_ON(vec->nr_frames > 0); + kvfree(vec); +} +EXPORT_SYMBOL(frame_vector_destroy); -- cgit v0.10.2 From 8a677b6eddfc3127ea36a710838ecd20502b1cb9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:45 -0300 Subject: [media] media: omap_vout: Convert omap_vout_uservirt_to_phys() to use get_vaddr_pfns() Convert omap_vout_uservirt_to_phys() to use get_vaddr_pfns() instead of hand made mapping of virtual address to physical address. Also the function leaked page reference from get_user_pages() so fix that by properly release the reference when omap_vout_buffer_release() is called. Signed-off-by: Jan Kara [hans.verkuil@cisco.com: remove unused variable] Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/omap/Kconfig b/drivers/media/platform/omap/Kconfig index dc2aaab..217d613 100644 --- a/drivers/media/platform/omap/Kconfig +++ b/drivers/media/platform/omap/Kconfig @@ -10,6 +10,7 @@ config VIDEO_OMAP2_VOUT select OMAP2_DSS if HAS_IOMEM && ARCH_OMAP2PLUS select OMAP2_VRFB if ARCH_OMAP2 || ARCH_OMAP3 select VIDEO_OMAP2_VOUT_VRFB if VIDEO_OMAP2_VOUT && OMAP2_VRFB + select FRAME_VECTOR default n ---help--- V4L2 Display driver support for OMAP2/3 based boards. diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index f09c5f1..7feb639 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -195,46 +195,34 @@ static int omap_vout_try_format(struct v4l2_pix_format *pix) } /* - * omap_vout_uservirt_to_phys: This inline function is used to convert user - * space virtual address to physical address. + * omap_vout_get_userptr: Convert user space virtual address to physical + * address. */ -static unsigned long omap_vout_uservirt_to_phys(unsigned long virtp) +static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp, + u32 *physp) { - unsigned long physp = 0; - struct vm_area_struct *vma; - struct mm_struct *mm = current->mm; + struct frame_vector *vec; + int ret; /* For kernel direct-mapped memory, take the easy way */ - if (virtp >= PAGE_OFFSET) - return virt_to_phys((void *) virtp); - - down_read(¤t->mm->mmap_sem); - vma = find_vma(mm, virtp); - if (vma && (vma->vm_flags & VM_IO) && vma->vm_pgoff) { - /* this will catch, kernel-allocated, mmaped-to-usermode - addresses */ - physp = (vma->vm_pgoff << PAGE_SHIFT) + (virtp - vma->vm_start); - up_read(¤t->mm->mmap_sem); - } else { - /* otherwise, use get_user_pages() for general userland pages */ - int res, nr_pages = 1; - struct page *pages; + if (virtp >= PAGE_OFFSET) { + *physp = virt_to_phys((void *)virtp); + return 0; + } - res = get_user_pages(current, current->mm, virtp, nr_pages, 1, - 0, &pages, NULL); - up_read(¤t->mm->mmap_sem); + vec = frame_vector_create(1); + if (!vec) + return -ENOMEM; - if (res == nr_pages) { - physp = __pa(page_address(&pages[0]) + - (virtp & ~PAGE_MASK)); - } else { - printk(KERN_WARNING VOUT_NAME - "get_user_pages failed\n"); - return 0; - } + ret = get_vaddr_frames(virtp, 1, true, false, vec); + if (ret != 1) { + frame_vector_destroy(vec); + return -EINVAL; } + *physp = __pfn_to_phys(frame_vector_pfns(vec)[0]); + vb->priv = vec; - return physp; + return 0; } /* @@ -784,11 +772,15 @@ static int omap_vout_buffer_prepare(struct videobuf_queue *q, * address of the buffer */ if (V4L2_MEMORY_USERPTR == vb->memory) { + int ret; + if (0 == vb->baddr) return -EINVAL; /* Physical address */ - vout->queued_buf_addr[vb->i] = (u8 *) - omap_vout_uservirt_to_phys(vb->baddr); + ret = omap_vout_get_userptr(vb, vb->baddr, + (u32 *)&vout->queued_buf_addr[vb->i]); + if (ret < 0) + return ret; } else { unsigned long addr, dma_addr; unsigned long size; @@ -834,12 +826,13 @@ static void omap_vout_buffer_queue(struct videobuf_queue *q, static void omap_vout_buffer_release(struct videobuf_queue *q, struct videobuf_buffer *vb) { - struct omap_vout_device *vout = q->priv_data; - vb->state = VIDEOBUF_NEEDS_INIT; + if (vb->memory == V4L2_MEMORY_USERPTR && vb->priv) { + struct frame_vector *vec = vb->priv; - if (V4L2_MEMORY_MMAP != vout->memory) - return; + put_vaddr_frames(vec); + frame_vector_destroy(vec); + } } /* -- cgit v0.10.2 From 21fb0cb7ec65a40b9f5f7cda59eba0eb2ae76473 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:46 -0300 Subject: [media] vb2: Provide helpers for mapping virtual addresses Provide simple helper functions to map virtual address range into an array of pfns / pages. Tested-by: Marek Szyprowski Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index b4b0229..82876a6 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -84,6 +84,7 @@ config VIDEOBUF2_CORE config VIDEOBUF2_MEMOPS tristate + select FRAME_VECTOR config VIDEOBUF2_DMA_CONTIG tristate diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 81c1ad8..0ec186d 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -137,6 +137,64 @@ int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size, EXPORT_SYMBOL_GPL(vb2_get_contig_userptr); /** + * vb2_create_framevec() - map virtual addresses to pfns + * @start: Virtual user address where we start mapping + * @length: Length of a range to map + * @write: Should we map for writing into the area + * + * This function allocates and fills in a vector with pfns corresponding to + * virtual address range passed in arguments. If pfns have corresponding pages, + * page references are also grabbed to pin pages in memory. The function + * returns pointer to the vector on success and error pointer in case of + * failure. Returned vector needs to be freed via vb2_destroy_pfnvec(). + */ +struct frame_vector *vb2_create_framevec(unsigned long start, + unsigned long length, + bool write) +{ + int ret; + unsigned long first, last; + unsigned long nr; + struct frame_vector *vec; + + first = start >> PAGE_SHIFT; + last = (start + length - 1) >> PAGE_SHIFT; + nr = last - first + 1; + vec = frame_vector_create(nr); + if (!vec) + return ERR_PTR(-ENOMEM); + ret = get_vaddr_frames(start, nr, write, 1, vec); + if (ret < 0) + goto out_destroy; + /* We accept only complete set of PFNs */ + if (ret != nr) { + ret = -EFAULT; + goto out_release; + } + return vec; +out_release: + put_vaddr_frames(vec); +out_destroy: + frame_vector_destroy(vec); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(vb2_create_framevec); + +/** + * vb2_destroy_framevec() - release vector of mapped pfns + * @vec: vector of pfns / pages to release + * + * This releases references to all pages in the vector @vec (if corresponding + * pfns are backed by pages) and frees the passed vector. + */ +void vb2_destroy_framevec(struct frame_vector *vec) +{ + put_vaddr_frames(vec); + frame_vector_destroy(vec); +} +EXPORT_SYMBOL(vb2_destroy_framevec); + +/** * vb2_common_vm_open() - increase refcount of the vma * @vma: virtual memory region for the mapping * diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h index f05444c..2f0564f 100644 --- a/include/media/videobuf2-memops.h +++ b/include/media/videobuf2-memops.h @@ -15,6 +15,7 @@ #define _MEDIA_VIDEOBUF2_MEMOPS_H #include +#include /** * vb2_vmarea_handler - common vma refcount tracking handler @@ -36,5 +37,9 @@ int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size, struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma); void vb2_put_vma(struct vm_area_struct *vma); +struct frame_vector *vb2_create_framevec(unsigned long start, + unsigned long length, + bool write); +void vb2_destroy_framevec(struct frame_vector *vec); #endif -- cgit v0.10.2 From 3336c24f25ec932aab128cf7285ca90a4dbcb816 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:47 -0300 Subject: [media] media: vb2: Convert vb2_dma_sg_get_userptr() to use frame vector Tested-by: Marek Szyprowski Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c index d2cf113..be7bd65 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c @@ -38,6 +38,7 @@ struct vb2_dma_sg_buf { struct device *dev; void *vaddr; struct page **pages; + struct frame_vector *vec; int offset; enum dma_data_direction dma_dir; struct sg_table sg_table; @@ -51,7 +52,6 @@ struct vb2_dma_sg_buf { unsigned int num_pages; atomic_t refcount; struct vb2_vmarea_handler handler; - struct vm_area_struct *vma; struct dma_buf_attachment *db_attach; }; @@ -225,25 +225,17 @@ static void vb2_dma_sg_finish(void *buf_priv) dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); } -static inline int vma_is_io(struct vm_area_struct *vma) -{ - return !!(vma->vm_flags & (VM_IO | VM_PFNMAP)); -} - static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, unsigned long size, enum dma_data_direction dma_dir) { struct vb2_dma_sg_conf *conf = alloc_ctx; struct vb2_dma_sg_buf *buf; - unsigned long first, last; - int num_pages_from_user; - struct vm_area_struct *vma; struct sg_table *sgt; DEFINE_DMA_ATTRS(attrs); + struct frame_vector *vec; dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); - buf = kzalloc(sizeof *buf, GFP_KERNEL); if (!buf) return NULL; @@ -254,63 +246,19 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, buf->offset = vaddr & ~PAGE_MASK; buf->size = size; buf->dma_sgt = &buf->sg_table; + vec = vb2_create_framevec(vaddr, size, buf->dma_dir == DMA_FROM_DEVICE); + if (IS_ERR(vec)) + goto userptr_fail_pfnvec; + buf->vec = vec; - first = (vaddr & PAGE_MASK) >> PAGE_SHIFT; - last = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT; - buf->num_pages = last - first + 1; - - buf->pages = kzalloc(buf->num_pages * sizeof(struct page *), - GFP_KERNEL); - if (!buf->pages) - goto userptr_fail_alloc_pages; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, vaddr); - if (!vma) { - dprintk(1, "no vma for address %lu\n", vaddr); - goto userptr_fail_find_vma; - } - - if (vma->vm_end < vaddr + size) { - dprintk(1, "vma at %lu is too small for %lu bytes\n", - vaddr, size); - goto userptr_fail_find_vma; - } - - buf->vma = vb2_get_vma(vma); - if (!buf->vma) { - dprintk(1, "failed to copy vma\n"); - goto userptr_fail_find_vma; - } - - if (vma_is_io(buf->vma)) { - for (num_pages_from_user = 0; - num_pages_from_user < buf->num_pages; - ++num_pages_from_user, vaddr += PAGE_SIZE) { - unsigned long pfn; - - if (follow_pfn(vma, vaddr, &pfn)) { - dprintk(1, "no page for address %lu\n", vaddr); - break; - } - buf->pages[num_pages_from_user] = pfn_to_page(pfn); - } - } else - num_pages_from_user = get_user_pages(current, current->mm, - vaddr & PAGE_MASK, - buf->num_pages, - buf->dma_dir == DMA_FROM_DEVICE, - 1, /* force */ - buf->pages, - NULL); - up_read(¤t->mm->mmap_sem); - - if (num_pages_from_user != buf->num_pages) - goto userptr_fail_get_user_pages; + buf->pages = frame_vector_pages(vec); + if (IS_ERR(buf->pages)) + goto userptr_fail_sgtable; + buf->num_pages = frame_vector_count(vec); if (sg_alloc_table_from_pages(buf->dma_sgt, buf->pages, buf->num_pages, buf->offset, size, 0)) - goto userptr_fail_alloc_table_from_pages; + goto userptr_fail_sgtable; sgt = &buf->sg_table; /* @@ -326,19 +274,9 @@ static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, userptr_fail_map: sg_free_table(&buf->sg_table); -userptr_fail_alloc_table_from_pages: -userptr_fail_get_user_pages: - dprintk(1, "get_user_pages requested/got: %d/%d]\n", - buf->num_pages, num_pages_from_user); - if (!vma_is_io(buf->vma)) - while (--num_pages_from_user >= 0) - put_page(buf->pages[num_pages_from_user]); - down_read(¤t->mm->mmap_sem); - vb2_put_vma(buf->vma); -userptr_fail_find_vma: - up_read(¤t->mm->mmap_sem); - kfree(buf->pages); -userptr_fail_alloc_pages: +userptr_fail_sgtable: + vb2_destroy_framevec(vec); +userptr_fail_pfnvec: kfree(buf); return NULL; } @@ -366,11 +304,8 @@ static void vb2_dma_sg_put_userptr(void *buf_priv) while (--i >= 0) { if (buf->dma_dir == DMA_FROM_DEVICE) set_page_dirty_lock(buf->pages[i]); - if (!vma_is_io(buf->vma)) - put_page(buf->pages[i]); } - kfree(buf->pages); - vb2_put_vma(buf->vma); + vb2_destroy_framevec(buf->vec); kfree(buf); } -- cgit v0.10.2 From 5a9e4dec393a2c5a01be6adc63065059b367d532 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:48 -0300 Subject: [media] media: vb2: Convert vb2_vmalloc_get_userptr() to use frame vector Convert vb2_vmalloc_get_userptr() to use frame vector infrastructure. When we are doing that there's no need to allocate page array and some code can be simplified. Tested-by: Marek Szyprowski Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c index 63bef95..ecb8f0c 100644 --- a/drivers/media/v4l2-core/videobuf2-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c @@ -23,11 +23,9 @@ struct vb2_vmalloc_buf { void *vaddr; - struct page **pages; - struct vm_area_struct *vma; + struct frame_vector *vec; enum dma_data_direction dma_dir; unsigned long size; - unsigned int n_pages; atomic_t refcount; struct vb2_vmarea_handler handler; struct dma_buf *dbuf; @@ -76,10 +74,8 @@ static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr, enum dma_data_direction dma_dir) { struct vb2_vmalloc_buf *buf; - unsigned long first, last; - int n_pages, offset; - struct vm_area_struct *vma; - dma_addr_t physp; + struct frame_vector *vec; + int n_pages, offset, i; buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) @@ -88,53 +84,36 @@ static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr, buf->dma_dir = dma_dir; offset = vaddr & ~PAGE_MASK; buf->size = size; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, vaddr); - if (vma && (vma->vm_flags & VM_PFNMAP) && (vma->vm_pgoff)) { - if (vb2_get_contig_userptr(vaddr, size, &vma, &physp)) - goto fail_pages_array_alloc; - buf->vma = vma; - buf->vaddr = (__force void *)ioremap_nocache(physp, size); - if (!buf->vaddr) - goto fail_pages_array_alloc; + vec = vb2_create_framevec(vaddr, size, dma_dir == DMA_FROM_DEVICE); + if (IS_ERR(vec)) + goto fail_pfnvec_create; + buf->vec = vec; + n_pages = frame_vector_count(vec); + if (frame_vector_to_pages(vec) < 0) { + unsigned long *nums = frame_vector_pfns(vec); + + /* + * We cannot get page pointers for these pfns. Check memory is + * physically contiguous and use direct mapping. + */ + for (i = 1; i < n_pages; i++) + if (nums[i-1] + 1 != nums[i]) + goto fail_map; + buf->vaddr = (__force void *) + ioremap_nocache(nums[0] << PAGE_SHIFT, size); } else { - first = vaddr >> PAGE_SHIFT; - last = (vaddr + size - 1) >> PAGE_SHIFT; - buf->n_pages = last - first + 1; - buf->pages = kzalloc(buf->n_pages * sizeof(struct page *), - GFP_KERNEL); - if (!buf->pages) - goto fail_pages_array_alloc; - - /* current->mm->mmap_sem is taken by videobuf2 core */ - n_pages = get_user_pages(current, current->mm, - vaddr & PAGE_MASK, buf->n_pages, - dma_dir == DMA_FROM_DEVICE, - 1, /* force */ - buf->pages, NULL); - if (n_pages != buf->n_pages) - goto fail_get_user_pages; - - buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, + buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1, PAGE_KERNEL); - if (!buf->vaddr) - goto fail_get_user_pages; } - up_read(¤t->mm->mmap_sem); + if (!buf->vaddr) + goto fail_map; buf->vaddr += offset; return buf; -fail_get_user_pages: - pr_debug("get_user_pages requested/got: %d/%d]\n", n_pages, - buf->n_pages); - while (--n_pages >= 0) - put_page(buf->pages[n_pages]); - kfree(buf->pages); - -fail_pages_array_alloc: - up_read(¤t->mm->mmap_sem); +fail_map: + vb2_destroy_framevec(vec); +fail_pfnvec_create: kfree(buf); return NULL; @@ -145,20 +124,21 @@ static void vb2_vmalloc_put_userptr(void *buf_priv) struct vb2_vmalloc_buf *buf = buf_priv; unsigned long vaddr = (unsigned long)buf->vaddr & PAGE_MASK; unsigned int i; + struct page **pages; + unsigned int n_pages; - if (buf->pages) { + if (!buf->vec->is_pfns) { + n_pages = frame_vector_count(buf->vec); + pages = frame_vector_pages(buf->vec); if (vaddr) - vm_unmap_ram((void *)vaddr, buf->n_pages); - for (i = 0; i < buf->n_pages; ++i) { - if (buf->dma_dir == DMA_FROM_DEVICE) - set_page_dirty_lock(buf->pages[i]); - put_page(buf->pages[i]); - } - kfree(buf->pages); + vm_unmap_ram((void *)vaddr, n_pages); + if (buf->dma_dir == DMA_FROM_DEVICE) + for (i = 0; i < n_pages; i++) + set_page_dirty_lock(pages[i]); } else { - vb2_put_vma(buf->vma); iounmap((__force void __iomem *)buf->vaddr); } + vb2_destroy_framevec(buf->vec); kfree(buf); } -- cgit v0.10.2 From fb639eb39154312af8bf08c58cc0142179e0c224 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:49 -0300 Subject: [media] media: vb2: Convert vb2_dc_get_userptr() to use frame vector Convert vb2_dc_get_userptr() to use frame vector infrastructure. When we are doing that there's no need to allocate page array and some code can be simplified. Tested-by: Marek Szyprowski Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index c548ce4..2397ceb 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -32,15 +32,13 @@ struct vb2_dc_buf { dma_addr_t dma_addr; enum dma_data_direction dma_dir; struct sg_table *dma_sgt; + struct frame_vector *vec; /* MMAP related */ struct vb2_vmarea_handler handler; atomic_t refcount; struct sg_table *sgt_base; - /* USERPTR related */ - struct vm_area_struct *vma; - /* DMABUF related */ struct dma_buf_attachment *db_attach; }; @@ -49,24 +47,6 @@ struct vb2_dc_buf { /* scatterlist table functions */ /*********************************************/ - -static void vb2_dc_sgt_foreach_page(struct sg_table *sgt, - void (*cb)(struct page *pg)) -{ - struct scatterlist *s; - unsigned int i; - - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { - struct page *page = sg_page(s); - unsigned int n_pages = PAGE_ALIGN(s->offset + s->length) - >> PAGE_SHIFT; - unsigned int j; - - for (j = 0; j < n_pages; ++j, ++page) - cb(page); - } -} - static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt) { struct scatterlist *s; @@ -429,92 +409,12 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags) /* callbacks for USERPTR buffers */ /*********************************************/ -static inline int vma_is_io(struct vm_area_struct *vma) -{ - return !!(vma->vm_flags & (VM_IO | VM_PFNMAP)); -} - -static int vb2_dc_get_user_pfn(unsigned long start, int n_pages, - struct vm_area_struct *vma, unsigned long *res) -{ - unsigned long pfn, start_pfn, prev_pfn; - unsigned int i; - int ret; - - if (!vma_is_io(vma)) - return -EFAULT; - - ret = follow_pfn(vma, start, &pfn); - if (ret) - return ret; - - start_pfn = pfn; - start += PAGE_SIZE; - - for (i = 1; i < n_pages; ++i, start += PAGE_SIZE) { - prev_pfn = pfn; - ret = follow_pfn(vma, start, &pfn); - - if (ret) { - pr_err("no page for address %lu\n", start); - return ret; - } - if (pfn != prev_pfn + 1) - return -EINVAL; - } - - *res = start_pfn; - return 0; -} - -static int vb2_dc_get_user_pages(unsigned long start, struct page **pages, - int n_pages, struct vm_area_struct *vma, - enum dma_data_direction dma_dir) -{ - if (vma_is_io(vma)) { - unsigned int i; - - for (i = 0; i < n_pages; ++i, start += PAGE_SIZE) { - unsigned long pfn; - int ret = follow_pfn(vma, start, &pfn); - - if (!pfn_valid(pfn)) - return -EINVAL; - - if (ret) { - pr_err("no page for address %lu\n", start); - return ret; - } - pages[i] = pfn_to_page(pfn); - } - } else { - int n; - - n = get_user_pages(current, current->mm, start & PAGE_MASK, - n_pages, dma_dir == DMA_FROM_DEVICE, 1, pages, NULL); - /* negative error means that no page was pinned */ - n = max(n, 0); - if (n != n_pages) { - pr_err("got only %d of %d user pages\n", n, n_pages); - while (n) - put_page(pages[--n]); - return -EFAULT; - } - } - - return 0; -} - -static void vb2_dc_put_dirty_page(struct page *page) -{ - set_page_dirty_lock(page); - put_page(page); -} - static void vb2_dc_put_userptr(void *buf_priv) { struct vb2_dc_buf *buf = buf_priv; struct sg_table *sgt = buf->dma_sgt; + int i; + struct page **pages; if (sgt) { DEFINE_DMA_ATTRS(attrs); @@ -526,13 +426,15 @@ static void vb2_dc_put_userptr(void *buf_priv) */ dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir, &attrs); - if (!vma_is_io(buf->vma)) - vb2_dc_sgt_foreach_page(sgt, vb2_dc_put_dirty_page); - + pages = frame_vector_pages(buf->vec); + /* sgt should exist only if vector contains pages... */ + BUG_ON(IS_ERR(pages)); + for (i = 0; i < frame_vector_count(buf->vec); i++) + set_page_dirty_lock(pages[i]); sg_free_table(sgt); kfree(sgt); } - vb2_put_vma(buf->vma); + vb2_destroy_framevec(buf->vec); kfree(buf); } @@ -572,13 +474,10 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, { struct vb2_dc_conf *conf = alloc_ctx; struct vb2_dc_buf *buf; - unsigned long start; - unsigned long end; + struct frame_vector *vec; unsigned long offset; - struct page **pages; - int n_pages; + int n_pages, i; int ret = 0; - struct vm_area_struct *vma; struct sg_table *sgt; unsigned long contig_size; unsigned long dma_align = dma_get_cache_alignment(); @@ -604,75 +503,43 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, buf->dev = conf->dev; buf->dma_dir = dma_dir; - start = vaddr & PAGE_MASK; offset = vaddr & ~PAGE_MASK; - end = PAGE_ALIGN(vaddr + size); - n_pages = (end - start) >> PAGE_SHIFT; - - pages = kmalloc(n_pages * sizeof(pages[0]), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - pr_err("failed to allocate pages table\n"); + vec = vb2_create_framevec(vaddr, size, dma_dir == DMA_FROM_DEVICE); + if (IS_ERR(vec)) { + ret = PTR_ERR(vec); goto fail_buf; } + buf->vec = vec; + n_pages = frame_vector_count(vec); + ret = frame_vector_to_pages(vec); + if (ret < 0) { + unsigned long *nums = frame_vector_pfns(vec); - down_read(¤t->mm->mmap_sem); - /* current->mm->mmap_sem is taken by videobuf2 core */ - vma = find_vma(current->mm, vaddr); - if (!vma) { - pr_err("no vma for address %lu\n", vaddr); - ret = -EFAULT; - goto fail_pages; - } - - if (vma->vm_end < vaddr + size) { - pr_err("vma at %lu is too small for %lu bytes\n", vaddr, size); - ret = -EFAULT; - goto fail_pages; - } - - buf->vma = vb2_get_vma(vma); - if (!buf->vma) { - pr_err("failed to copy vma\n"); - ret = -ENOMEM; - goto fail_pages; - } - - /* extract page list from userspace mapping */ - ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir); - if (ret) { - unsigned long pfn; - if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) { - up_read(¤t->mm->mmap_sem); - buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, pfn); - buf->size = size; - kfree(pages); - return buf; - } - - pr_err("failed to get user pages\n"); - goto fail_vma; + /* + * Failed to convert to pages... Check the memory is physically + * contiguous and use direct mapping + */ + for (i = 1; i < n_pages; i++) + if (nums[i-1] + 1 != nums[i]) + goto fail_pfnvec; + buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, nums[0]); + goto out; } - up_read(¤t->mm->mmap_sem); sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { pr_err("failed to allocate sg table\n"); ret = -ENOMEM; - goto fail_get_user_pages; + goto fail_pfnvec; } - ret = sg_alloc_table_from_pages(sgt, pages, n_pages, + ret = sg_alloc_table_from_pages(sgt, frame_vector_pages(vec), n_pages, offset, size, GFP_KERNEL); if (ret) { pr_err("failed to initialize sg table\n"); goto fail_sgt; } - /* pages are no longer needed */ - kfree(pages); - pages = NULL; - /* * No need to sync to the device, this will happen later when the * prepare() memop is called. @@ -694,8 +561,9 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, } buf->dma_addr = sg_dma_address(sgt->sgl); - buf->size = size; buf->dma_sgt = sgt; +out: + buf->size = size; return buf; @@ -704,25 +572,13 @@ fail_map_sg: buf->dma_dir, &attrs); fail_sgt_init: - if (!vma_is_io(buf->vma)) - vb2_dc_sgt_foreach_page(sgt, put_page); sg_free_table(sgt); fail_sgt: kfree(sgt); -fail_get_user_pages: - if (pages && !vma_is_io(buf->vma)) - while (n_pages) - put_page(pages[--n_pages]); - - down_read(¤t->mm->mmap_sem); -fail_vma: - vb2_put_vma(buf->vma); - -fail_pages: - up_read(¤t->mm->mmap_sem); - kfree(pages); /* kfree is NULL-proof */ +fail_pfnvec: + vb2_destroy_framevec(vec); fail_buf: kfree(buf); -- cgit v0.10.2 From 6690c8c78c745239bb1f22b23f3889a0a14c249b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:50 -0300 Subject: [media] media: vb2: Remove unused functions Conversion to the use of pinned pfns made some functions unused. Remove them. Also there's no need to lock mmap_sem in __buf_prepare() anymore. Tested-by: Marek Szyprowski Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 0ec186d..48c6a49 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -23,120 +23,6 @@ #include /** - * vb2_get_vma() - acquire and lock the virtual memory area - * @vma: given virtual memory area - * - * This function attempts to acquire an area mapped in the userspace for - * the duration of a hardware operation. The area is "locked" by performing - * the same set of operation that are done when process calls fork() and - * memory areas are duplicated. - * - * Returns a copy of a virtual memory region on success or NULL. - */ -struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma) -{ - struct vm_area_struct *vma_copy; - - vma_copy = kmalloc(sizeof(*vma_copy), GFP_KERNEL); - if (vma_copy == NULL) - return NULL; - - if (vma->vm_ops && vma->vm_ops->open) - vma->vm_ops->open(vma); - - if (vma->vm_file) - get_file(vma->vm_file); - - memcpy(vma_copy, vma, sizeof(*vma)); - - vma_copy->vm_mm = NULL; - vma_copy->vm_next = NULL; - vma_copy->vm_prev = NULL; - - return vma_copy; -} -EXPORT_SYMBOL_GPL(vb2_get_vma); - -/** - * vb2_put_userptr() - release a userspace virtual memory area - * @vma: virtual memory region associated with the area to be released - * - * This function releases the previously acquired memory area after a hardware - * operation. - */ -void vb2_put_vma(struct vm_area_struct *vma) -{ - if (!vma) - return; - - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - - if (vma->vm_file) - fput(vma->vm_file); - - kfree(vma); -} -EXPORT_SYMBOL_GPL(vb2_put_vma); - -/** - * vb2_get_contig_userptr() - lock physically contiguous userspace mapped memory - * @vaddr: starting virtual address of the area to be verified - * @size: size of the area - * @res_paddr: will return physical address for the given vaddr - * @res_vma: will return locked copy of struct vm_area for the given area - * - * This function will go through memory area of size @size mapped at @vaddr and - * verify that the underlying physical pages are contiguous. If they are - * contiguous the virtual memory area is locked and a @res_vma is filled with - * the copy and @res_pa set to the physical address of the buffer. - * - * Returns 0 on success. - */ -int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size, - struct vm_area_struct **res_vma, dma_addr_t *res_pa) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long offset, start, end; - unsigned long this_pfn, prev_pfn; - dma_addr_t pa = 0; - - start = vaddr; - offset = start & ~PAGE_MASK; - end = start + size; - - vma = find_vma(mm, start); - - if (vma == NULL || vma->vm_end < end) - return -EFAULT; - - for (prev_pfn = 0; start < end; start += PAGE_SIZE) { - int ret = follow_pfn(vma, start, &this_pfn); - if (ret) - return ret; - - if (prev_pfn == 0) - pa = this_pfn << PAGE_SHIFT; - else if (this_pfn != prev_pfn + 1) - return -EFAULT; - - prev_pfn = this_pfn; - } - - /* - * Memory is contigous, lock vma and return to the caller - */ - *res_vma = vb2_get_vma(vma); - if (*res_vma == NULL) - return -ENOMEM; - - *res_pa = pa + offset; - return 0; -} -EXPORT_SYMBOL_GPL(vb2_get_contig_userptr); - -/** * vb2_create_framevec() - map virtual addresses to pfns * @start: Virtual user address where we start mapping * @length: Length of a range to map diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h index 2f0564f..830b523 100644 --- a/include/media/videobuf2-memops.h +++ b/include/media/videobuf2-memops.h @@ -31,12 +31,6 @@ struct vb2_vmarea_handler { extern const struct vm_operations_struct vb2_common_vm_ops; -int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size, - struct vm_area_struct **res_vma, dma_addr_t *res_pa); - -struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma); -void vb2_put_vma(struct vm_area_struct *vma); - struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long length, bool write); -- cgit v0.10.2 From 63540f01917c0d8b03b9813a0d6539469b163139 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 20 Jul 2015 05:03:35 -0300 Subject: [media] drm/exynos: Convert g2d_userptr_get_dma_addr() to use get_vaddr_frames() Convert g2d_userptr_get_dma_addr() to pin pages using get_vaddr_frames(). This removes the knowledge about vmas and mmap_sem locking from exynos driver. Also it fixes a problem that the function has been mapping user provided address without holding mmap_sem. Acked-by: Inki Dae Signed-off-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 43003c4..b364562 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -77,6 +77,7 @@ config DRM_EXYNOS_VIDI config DRM_EXYNOS_G2D bool "Exynos DRM G2D" depends on DRM_EXYNOS && !VIDEO_SAMSUNG_S5P_G2D + select FRAME_VECTOR help Choose this option if you want to use Exynos G2D for DRM. diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 81a2508..7584834 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -190,10 +190,8 @@ struct g2d_cmdlist_userptr { dma_addr_t dma_addr; unsigned long userptr; unsigned long size; - struct page **pages; - unsigned int npages; + struct frame_vector *vec; struct sg_table *sgt; - struct vm_area_struct *vma; atomic_t refcount; bool in_pool; bool out_of_list; @@ -363,6 +361,7 @@ static void g2d_userptr_put_dma_addr(struct drm_device *drm_dev, { struct g2d_cmdlist_userptr *g2d_userptr = (struct g2d_cmdlist_userptr *)obj; + struct page **pages; if (!obj) return; @@ -382,19 +381,21 @@ out: exynos_gem_unmap_sgt_from_dma(drm_dev, g2d_userptr->sgt, DMA_BIDIRECTIONAL); - exynos_gem_put_pages_to_userptr(g2d_userptr->pages, - g2d_userptr->npages, - g2d_userptr->vma); + pages = frame_vector_pages(g2d_userptr->vec); + if (!IS_ERR(pages)) { + int i; - exynos_gem_put_vma(g2d_userptr->vma); + for (i = 0; i < frame_vector_count(g2d_userptr->vec); i++) + set_page_dirty_lock(pages[i]); + } + put_vaddr_frames(g2d_userptr->vec); + frame_vector_destroy(g2d_userptr->vec); if (!g2d_userptr->out_of_list) list_del_init(&g2d_userptr->list); sg_free_table(g2d_userptr->sgt); kfree(g2d_userptr->sgt); - - drm_free_large(g2d_userptr->pages); kfree(g2d_userptr); } @@ -408,9 +409,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, struct exynos_drm_g2d_private *g2d_priv = file_priv->g2d_priv; struct g2d_cmdlist_userptr *g2d_userptr; struct g2d_data *g2d; - struct page **pages; struct sg_table *sgt; - struct vm_area_struct *vma; unsigned long start, end; unsigned int npages, offset; int ret; @@ -456,65 +455,40 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, return ERR_PTR(-ENOMEM); atomic_set(&g2d_userptr->refcount, 1); + g2d_userptr->size = size; start = userptr & PAGE_MASK; offset = userptr & ~PAGE_MASK; end = PAGE_ALIGN(userptr + size); npages = (end - start) >> PAGE_SHIFT; - g2d_userptr->npages = npages; - - pages = drm_calloc_large(npages, sizeof(struct page *)); - if (!pages) { - DRM_ERROR("failed to allocate pages.\n"); + g2d_userptr->vec = frame_vector_create(npages); + if (!g2d_userptr->vec) { ret = -ENOMEM; goto err_free; } - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, userptr); - if (!vma) { - up_read(¤t->mm->mmap_sem); - DRM_ERROR("failed to get vm region.\n"); + ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec); + if (ret != npages) { + DRM_ERROR("failed to get user pages from userptr.\n"); + if (ret < 0) + goto err_destroy_framevec; ret = -EFAULT; - goto err_free_pages; + goto err_put_framevec; } - - if (vma->vm_end < userptr + size) { - up_read(¤t->mm->mmap_sem); - DRM_ERROR("vma is too small.\n"); + if (frame_vector_to_pages(g2d_userptr->vec) < 0) { ret = -EFAULT; - goto err_free_pages; - } - - g2d_userptr->vma = exynos_gem_get_vma(vma); - if (!g2d_userptr->vma) { - up_read(¤t->mm->mmap_sem); - DRM_ERROR("failed to copy vma.\n"); - ret = -ENOMEM; - goto err_free_pages; - } - - g2d_userptr->size = size; - - ret = exynos_gem_get_pages_from_userptr(start & PAGE_MASK, - npages, pages, vma); - if (ret < 0) { - up_read(¤t->mm->mmap_sem); - DRM_ERROR("failed to get user pages from userptr.\n"); - goto err_put_vma; + goto err_put_framevec; } - up_read(¤t->mm->mmap_sem); - g2d_userptr->pages = pages; - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { ret = -ENOMEM; - goto err_free_userptr; + goto err_put_framevec; } - ret = sg_alloc_table_from_pages(sgt, pages, npages, offset, - size, GFP_KERNEL); + ret = sg_alloc_table_from_pages(sgt, + frame_vector_pages(g2d_userptr->vec), + npages, offset, size, GFP_KERNEL); if (ret < 0) { DRM_ERROR("failed to get sgt from pages.\n"); goto err_free_sgt; @@ -549,16 +523,11 @@ err_sg_free_table: err_free_sgt: kfree(sgt); -err_free_userptr: - exynos_gem_put_pages_to_userptr(g2d_userptr->pages, - g2d_userptr->npages, - g2d_userptr->vma); - -err_put_vma: - exynos_gem_put_vma(g2d_userptr->vma); +err_put_framevec: + put_vaddr_frames(g2d_userptr->vec); -err_free_pages: - drm_free_large(pages); +err_destroy_framevec: + frame_vector_destroy(g2d_userptr->vec); err_free: kfree(g2d_userptr); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 0d5b969..47068ae 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -378,103 +378,6 @@ int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data, return 0; } -struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma) -{ - struct vm_area_struct *vma_copy; - - vma_copy = kmalloc(sizeof(*vma_copy), GFP_KERNEL); - if (!vma_copy) - return NULL; - - if (vma->vm_ops && vma->vm_ops->open) - vma->vm_ops->open(vma); - - if (vma->vm_file) - get_file(vma->vm_file); - - memcpy(vma_copy, vma, sizeof(*vma)); - - vma_copy->vm_mm = NULL; - vma_copy->vm_next = NULL; - vma_copy->vm_prev = NULL; - - return vma_copy; -} - -void exynos_gem_put_vma(struct vm_area_struct *vma) -{ - if (!vma) - return; - - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - - if (vma->vm_file) - fput(vma->vm_file); - - kfree(vma); -} - -int exynos_gem_get_pages_from_userptr(unsigned long start, - unsigned int npages, - struct page **pages, - struct vm_area_struct *vma) -{ - int get_npages; - - /* the memory region mmaped with VM_PFNMAP. */ - if (vma_is_io(vma)) { - unsigned int i; - - for (i = 0; i < npages; ++i, start += PAGE_SIZE) { - unsigned long pfn; - int ret = follow_pfn(vma, start, &pfn); - if (ret) - return ret; - - pages[i] = pfn_to_page(pfn); - } - - if (i != npages) { - DRM_ERROR("failed to get user_pages.\n"); - return -EINVAL; - } - - return 0; - } - - get_npages = get_user_pages(current, current->mm, start, - npages, 1, 1, pages, NULL); - get_npages = max(get_npages, 0); - if (get_npages != npages) { - DRM_ERROR("failed to get user_pages.\n"); - while (get_npages) - put_page(pages[--get_npages]); - return -EFAULT; - } - - return 0; -} - -void exynos_gem_put_pages_to_userptr(struct page **pages, - unsigned int npages, - struct vm_area_struct *vma) -{ - if (!vma_is_io(vma)) { - unsigned int i; - - for (i = 0; i < npages; i++) { - set_page_dirty_lock(pages[i]); - - /* - * undo the reference we took when populating - * the table. - */ - put_page(pages[i]); - } - } -} - int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev, struct sg_table *sgt, enum dma_data_direction dir) -- cgit v0.10.2 From 488ca7d72d974e3c00ae73ed9f947590680bdf00 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 21 Aug 2015 14:56:46 -0700 Subject: x86/cpufeatures: Enable cpuid for Intel SHA extensions Add Intel CPUID for Intel Secure Hash Algorithm Extensions. This feature provides new instructions for accelerated computation of SHA-1 and SHA-256. This allows the feature to be shown in the /proc/cpuinfo for cpus that support it. Refer to SHA extension programming guide in chapter 8.2 of the Intel Architecture Instruction Set Extensions Programming reference for definition of this feature's cpuid: CPUID.(EAX=07H, ECX=0):EBX.SHA [bit 29] = 1 https://software.intel.com/sites/default/files/managed/07/b7/319433-023.pdf Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Cc: Borislav Petkov Cc: Dave Hansen Cc: Herbert Xu Link: http://lkml.kernel.org/r/1440194206.3940.6.camel@schen9-mobl2 Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606f..a94f83d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -239,6 +239,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -- cgit v0.10.2 From 76c28f1fcfeb42b47f798fe498351ee1d60086ae Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 Aug 2015 10:26:03 -0700 Subject: target/iscsi: Fix np_ip bracket issue by removing np_ip Revert commit 1997e6259, which causes double brackets on ipv6 inaddr_any addresses. Since we have np_sockaddr, if we need a textual representation we can use "%pISc". Change iscsit_add_network_portal() and iscsit_add_np() signatures to remove *ip_str parameter. Fix and extend some comments earlier in the function. Tested to work for :: and ::1 via iscsiadm, previously :: failed, see https://bugzilla.redhat.com/show_bug.cgi?id=1249107 . CC: stable@vger.kernel.org Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index e55f49c..d75eeb5 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -341,7 +341,6 @@ static struct iscsi_np *iscsit_get_np( struct iscsi_np *iscsit_add_np( struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, int network_transport) { struct sockaddr_in *sock_in; @@ -370,11 +369,9 @@ struct iscsi_np *iscsit_add_np( np->np_flags |= NPF_IP_NETWORK; if (sockaddr->ss_family == AF_INET6) { sock_in6 = (struct sockaddr_in6 *)sockaddr; - snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str); np->np_port = ntohs(sock_in6->sin6_port); } else { sock_in = (struct sockaddr_in *)sockaddr; - sprintf(np->np_ip, "%s", ip_str); np->np_port = ntohs(sock_in->sin_port); } @@ -411,8 +408,8 @@ struct iscsi_np *iscsit_add_np( list_add_tail(&np->np_list, &g_np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Added Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); return np; } @@ -481,8 +478,8 @@ int iscsit_del_np(struct iscsi_np *np) list_del(&np->np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Removed Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); iscsit_put_transport(np->np_transport); kfree(np); @@ -3463,7 +3460,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); - char *fmt_str; if (np->np_network_transport != network_transport) continue; @@ -3491,15 +3487,18 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - if (np->np_sockaddr.ss_family == AF_INET6) - fmt_str = "TargetAddress=[%s]:%hu,%hu"; - else - fmt_str = "TargetAddress=%s:%hu,%hu"; - - len = sprintf(buf, fmt_str, - inaddr_any ? conn->local_ip : np->np_ip, - np->np_port, - tpg->tpgt); + if (inaddr_any) { + len = sprintf(buf, "TargetAddress=" + "%s:%hu,%hu", + conn->local_ip, + np->np_port, + tpg->tpgt); + } else { + len = sprintf(buf, "TargetAddress=" + "%pISpc,%hu", + &np->np_sockaddr, + tpg->tpgt); + } len += 1; if ((len + payload_len) > buffer_len) { diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 7d0f9c0..d294f03 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -13,7 +13,7 @@ extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, - char *, int); + int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 48f708b..ad6a889 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -99,7 +99,7 @@ static ssize_t lio_target_np_store_sctp( * Use existing np->np_sockaddr for SCTP network portal reference */ tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_SCTP_TCP); + tpg_np, ISCSI_SCTP_TCP); if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) goto out; } else { @@ -177,7 +177,7 @@ static ssize_t lio_target_np_store_iser( } tpg_np_iser = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_INFINIBAND); + tpg_np, ISCSI_INFINIBAND); if (IS_ERR(tpg_np_iser)) { rc = PTR_ERR(tpg_np_iser); goto out; @@ -248,8 +248,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg( return ERR_PTR(-EINVAL); } str++; /* Skip over leading "[" */ - *str2 = '\0'; /* Terminate the IPv6 address */ - str2++; /* Skip over the "]" */ + *str2 = '\0'; /* Terminate the unbracketed IPv6 address */ + str2++; /* Skip over the \0 */ port_str = strstr(str2, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -316,7 +316,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/ * */ - tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL, + tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, NULL, ISCSI_TCP); if (IS_ERR(tpg_np)) { iscsit_put_tpg(tpg); @@ -344,8 +344,8 @@ static void lio_target_call_delnpfromtpg( se_tpg = &tpg->tpg_se_tpg; pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" - " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), - tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port); + " PORTAL: %pISc:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, &tpg_np->tpg_np->np_sockaddr, tpg_np->tpg_np->np_port); ret = iscsit_tpg_del_network_portal(tpg, tpg_np); if (ret < 0) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bd192f8..88e0b97 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -823,8 +823,8 @@ static void iscsi_handle_login_thread_timeout(unsigned long data) struct iscsi_np *np = (struct iscsi_np *) data; spin_lock_bh(&np->np_thread_lock); - pr_err("iSCSI Login timeout on Network Portal %s:%hu\n", - np->np_ip, np->np_port); + pr_err("iSCSI Login timeout on Network Portal %pISc:%hu\n", + &np->np_sockaddr, np->np_port); if (np->np_login_timer_flags & ISCSI_TF_STOP) { spin_unlock_bh(&np->np_thread_lock); @@ -1302,8 +1302,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) spin_lock_bh(&np->np_thread_lock); if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { spin_unlock_bh(&np->np_thread_lock); - pr_err("iSCSI Network Portal on %s:%hu currently not" - " active.\n", np->np_ip, np->np_port); + pr_err("iSCSI Network Portal on %pISc:%hu currently not" + " active.\n", &np->np_sockaddr, np->np_port); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); goto new_sess_out; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8262a85..31007cb 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -461,7 +461,6 @@ static bool iscsit_tpg_check_network_portal( struct iscsi_tpg_np *iscsit_tpg_add_network_portal( struct iscsi_portal_group *tpg, struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, struct iscsi_tpg_np *tpg_np_parent, int network_transport) { @@ -471,8 +470,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( if (!tpg_np_parent) { if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr, network_transport)) { - pr_err("Network Portal: %s already exists on a" - " different TPG on %s\n", ip_str, + pr_err("Network Portal: %pISc already exists on a" + " different TPG on %s\n", sockaddr, tpg->tpg_tiqn->tiqn); return ERR_PTR(-EEXIST); } @@ -485,7 +484,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( return ERR_PTR(-ENOMEM); } - np = iscsit_add_np(sockaddr, ip_str, network_transport); + np = iscsit_add_np(sockaddr, network_transport); if (IS_ERR(np)) { kfree(tpg_np); return ERR_CAST(np); @@ -515,8 +514,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( spin_unlock(&tpg_np_parent->tpg_np_parent_lock); } - pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Added Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); return tpg_np; @@ -529,8 +528,8 @@ static int iscsit_tpg_release_np( { iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); - pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Removed Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); tpg_np->tpg_np = NULL; diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index a2790fd..1c0b1d6 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -22,7 +22,7 @@ extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, - struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *, + struct __kernel_sockaddr_storage *, struct iscsi_tpg_np *, int); extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, struct iscsi_tpg_np *); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index d4616ef..1051d0c 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -778,7 +778,6 @@ struct iscsi_np { enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; - unsigned char np_ip[IPV6_ADDRESS_SPACE]; u16 np_port; spinlock_t np_thread_lock; struct completion np_restart_comp; -- cgit v0.10.2 From 69d755747d31c07a416064f251c2f408938fb67a Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 Aug 2015 10:26:04 -0700 Subject: target/iscsi: Keep local_ip as the actual sockaddr This is a more natural format that lets us format it with the appropriate printk specifier as needed. This also lets us handle v4-mapped ipv6 addresses a little more nicely, by storing the addr as an actual v4 sockaddr in conn->local_sockaddr. Finally, we no longer need to maintain variables for port, since this is contained in sockaddr. Remove iscsi_np.np_port and iscsi_conn.local_port. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 7717009..9e7094c 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3218,9 +3218,7 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, conn->login_port = ntohs(sock_in6->sin6_port); sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.src_addr; - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c", - &sock_in6->sin6_addr.in6_u); - conn->local_port = ntohs(sock_in6->sin6_port); + memcpy(&conn->local_sockaddr , &sock_in6, sizeof(sock_in6)); } else { sock_in = (struct sockaddr_in *)&cm_route->addr.dst_addr; sprintf(conn->login_ip, "%pI4", @@ -3228,9 +3226,7 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, conn->login_port = ntohs(sock_in->sin_port); sock_in = (struct sockaddr_in *)&cm_route->addr.src_addr; - sprintf(conn->local_ip, "%pI4", - &sock_in->sin_addr.s_addr); - conn->local_port = ntohs(sock_in->sin_port); + memcpy(&conn->local_sockaddr , &sock_in, sizeof(sock_in)); } } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d75eeb5..f752235 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -276,7 +276,7 @@ bool iscsit_check_np_match( struct sockaddr_in *sock_in, *sock_in_e; struct sockaddr_in6 *sock_in6, *sock_in6_e; bool ip_match = false; - u16 port; + u16 port, port_e; if (sockaddr->ss_family == AF_INET6) { sock_in6 = (struct sockaddr_in6 *)sockaddr; @@ -288,6 +288,7 @@ bool iscsit_check_np_match( ip_match = true; port = ntohs(sock_in6->sin6_port); + port_e = ntohs(sock_in6_e->sin6_port); } else { sock_in = (struct sockaddr_in *)sockaddr; sock_in_e = (struct sockaddr_in *)&np->np_sockaddr; @@ -296,9 +297,10 @@ bool iscsit_check_np_match( ip_match = true; port = ntohs(sock_in->sin_port); + port_e = ntohs(sock_in_e->sin_port); } - if (ip_match && (np->np_port == port) && + if (ip_match && (port_e == port) && (np->np_network_transport == network_transport)) return true; @@ -343,8 +345,6 @@ struct iscsi_np *iscsit_add_np( struct __kernel_sockaddr_storage *sockaddr, int network_transport) { - struct sockaddr_in *sock_in; - struct sockaddr_in6 *sock_in6; struct iscsi_np *np; int ret; @@ -367,14 +367,6 @@ struct iscsi_np *iscsit_add_np( } np->np_flags |= NPF_IP_NETWORK; - if (sockaddr->ss_family == AF_INET6) { - sock_in6 = (struct sockaddr_in6 *)sockaddr; - np->np_port = ntohs(sock_in6->sin6_port); - } else { - sock_in = (struct sockaddr_in *)sockaddr; - np->np_port = ntohs(sock_in->sin_port); - } - np->np_network_transport = network_transport; spin_lock_init(&np->np_thread_lock); init_completion(&np->np_restart_comp); @@ -408,8 +400,8 @@ struct iscsi_np *iscsit_add_np( list_add_tail(&np->np_list, &g_np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Added Network Portal: %pISc:%hu on %s\n", - &np->np_sockaddr, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Added Network Portal: %pISpc on %s\n", + &np->np_sockaddr, np->np_transport->name); return np; } @@ -478,8 +470,8 @@ int iscsit_del_np(struct iscsi_np *np) list_del(&np->np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Removed Network Portal: %pISc:%hu on %s\n", - &np->np_sockaddr, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Removed Network Portal: %pISpc on %s\n", + &np->np_sockaddr, np->np_transport->name); iscsit_put_transport(np->np_transport); kfree(np); @@ -3460,6 +3452,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + struct __kernel_sockaddr_storage *sockaddr; if (np->np_network_transport != network_transport) continue; @@ -3487,18 +3480,15 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - if (inaddr_any) { - len = sprintf(buf, "TargetAddress=" - "%s:%hu,%hu", - conn->local_ip, - np->np_port, - tpg->tpgt); - } else { - len = sprintf(buf, "TargetAddress=" - "%pISpc,%hu", - &np->np_sockaddr, - tpg->tpgt); - } + if (inaddr_any) + sockaddr = &conn->local_sockaddr; + else + sockaddr = &np->np_sockaddr; + + len = sprintf(buf, "TargetAddress=" + "%pISpc,%hu", + sockaddr, + tpg->tpgt); len += 1; if ((len + payload_len) > buffer_len) { diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index ad6a889..8d69c41 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -344,8 +344,8 @@ static void lio_target_call_delnpfromtpg( se_tpg = &tpg->tpg_se_tpg; pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" - " PORTAL: %pISc:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), - tpg->tpgt, &tpg_np->tpg_np->np_sockaddr, tpg_np->tpg_np->np_port); + " PORTAL: %pISpc\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, &tpg_np->tpg_np->np_sockaddr); ret = iscsit_tpg_del_network_portal(tpg, tpg_np); if (ret < 0) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 88e0b97..007299a 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -729,8 +729,8 @@ int iscsi_post_login_handler( } pr_debug("iSCSI Login successful on CID: %hu from %s to" - " %s:%hu,%hu\n", conn->cid, conn->login_ip, - conn->local_ip, conn->local_port, tpg->tpgt); + " %pISpc,%hu\n", conn->cid, conn->login_ip, + &conn->local_sockaddr, tpg->tpgt); list_add_tail(&conn->conn_list, &sess->sess_conn_list); atomic_inc(&sess->nconn); @@ -774,8 +774,8 @@ int iscsi_post_login_handler( pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n"); sess->session_state = TARG_SESS_STATE_LOGGED_IN; - pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n", - conn->cid, conn->login_ip, conn->local_ip, conn->local_port, + pr_debug("iSCSI Login successful on CID: %hu from %s to %pISpc,%hu\n", + conn->cid, conn->login_ip, &conn->local_sockaddr, tpg->tpgt); spin_lock_bh(&sess->conn_lock); @@ -823,8 +823,8 @@ static void iscsi_handle_login_thread_timeout(unsigned long data) struct iscsi_np *np = (struct iscsi_np *) data; spin_lock_bh(&np->np_thread_lock); - pr_err("iSCSI Login timeout on Network Portal %pISc:%hu\n", - &np->np_sockaddr, np->np_port); + pr_err("iSCSI Login timeout on Network Portal %pISpc\n", + &np->np_sockaddr); if (np->np_login_timer_flags & ISCSI_TF_STOP) { spin_unlock_bh(&np->np_thread_lock); @@ -1027,13 +1027,15 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 0); if (!rc) { - if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) - snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]", - &sock_in6.sin6_addr.in6_u); - else - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4", - &sock_in6.sin6_addr.s6_addr32[3]); - conn->local_port = ntohs(sock_in6.sin6_port); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { + memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6)); + } else { + /* Pretend to be an ipv4 socket */ + sock_in.sin_family = AF_INET; + sock_in.sin_port = sock_in6.sin6_port; + memcpy(&sock_in.sin_addr, &sock_in6.sin6_addr.s6_addr32[3], 4); + memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in)); + } } } else { memset(&sock_in, 0, sizeof(struct sockaddr_in)); @@ -1048,11 +1050,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in, &err, 0); - if (!rc) { - sprintf(conn->local_ip, "%pI4", - &sock_in.sin_addr.s_addr); - conn->local_port = ntohs(sock_in.sin_port); - } + if (!rc) + memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in)); } return 0; @@ -1302,8 +1301,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) spin_lock_bh(&np->np_thread_lock); if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { spin_unlock_bh(&np->np_thread_lock); - pr_err("iSCSI Network Portal on %pISc:%hu currently not" - " active.\n", &np->np_sockaddr, np->np_port); + pr_err("iSCSI Network Portal on %pISpc currently not" + " active.\n", &np->np_sockaddr); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); goto new_sess_out; @@ -1313,8 +1312,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) conn->network_transport = np->np_network_transport; pr_debug("Received iSCSI login request from %s on %s Network" - " Portal %s:%hu\n", conn->login_ip, np->np_transport->name, - conn->local_ip, conn->local_port); + " Portal %pISpc\n", conn->login_ip, np->np_transport->name, + &conn->local_sockaddr); pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n"); conn->conn_state = TARG_CONN_STATE_IN_LOGIN; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 31007cb..d61ae51 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -514,8 +514,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( spin_unlock(&tpg_np_parent->tpg_np_parent_lock); } - pr_debug("CORE[%s] - Added Network Portal: %pISc:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Added Network Portal: %pISpc,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, tpg->tpgt, np->np_transport->name); return tpg_np; @@ -528,8 +528,8 @@ static int iscsit_tpg_release_np( { iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); - pr_debug("CORE[%s] - Removed Network Portal: %pISc:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Removed Network Portal: %pISpc,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, tpg->tpgt, np->np_transport->name); tpg_np->tpg_np = NULL; diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 1051d0c..b943411 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -519,7 +519,6 @@ struct iscsi_conn { u16 cid; /* Remote TCP Port */ u16 login_port; - u16 local_port; int net_size; int login_family; u32 auth_id; @@ -531,7 +530,7 @@ struct iscsi_conn { u32 stat_sn; #define IPV6_ADDRESS_SPACE 48 unsigned char login_ip[IPV6_ADDRESS_SPACE]; - unsigned char local_ip[IPV6_ADDRESS_SPACE]; + struct __kernel_sockaddr_storage local_sockaddr; int conn_usage_count; int conn_waiting_on_uc; atomic_t check_immediate_queue; @@ -778,7 +777,6 @@ struct iscsi_np { enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; - u16 np_port; spinlock_t np_thread_lock; struct completion np_restart_comp; struct socket *np_socket; -- cgit v0.10.2 From dc58f760e2e1f8f2265b581d35f211415c4fee0c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 Aug 2015 10:26:05 -0700 Subject: target/iscsi: Replace conn->login_ip with login_sockaddr Very similar to how it went with local_sockaddr. It was embedded in iscsi_login_stats so some changes there, and we needed to copy in a sockaddr_storage comparison function. Hopefully the kernel will get a standard one soon, our implementation makes the 3rd. isert_set_conn_info() became much smaller. IPV6_ADDRESS_SPACE define goes away, had to modify a call to in6_pton(), can just use -1 since we are sure string is null-terminated. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9e7094c..aa08606 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3206,28 +3206,11 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, { struct rdma_cm_id *cm_id = isert_conn->cm_id; struct rdma_route *cm_route = &cm_id->route; - struct sockaddr_in *sock_in; - struct sockaddr_in6 *sock_in6; conn->login_family = np->np_sockaddr.ss_family; - if (np->np_sockaddr.ss_family == AF_INET6) { - sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.dst_addr; - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", - &sock_in6->sin6_addr.in6_u); - conn->login_port = ntohs(sock_in6->sin6_port); - - sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.src_addr; - memcpy(&conn->local_sockaddr , &sock_in6, sizeof(sock_in6)); - } else { - sock_in = (struct sockaddr_in *)&cm_route->addr.dst_addr; - sprintf(conn->login_ip, "%pI4", - &sock_in->sin_addr.s_addr); - conn->login_port = ntohs(sock_in->sin_port); - - sock_in = (struct sockaddr_in *)&cm_route->addr.src_addr; - memcpy(&conn->local_sockaddr , &sock_in, sizeof(sock_in)); - } + conn->login_sockaddr = cm_route->addr.dst_addr; + conn->local_sockaddr = cm_route->addr.src_addr; } static int diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 8d69c41..5afa629 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -267,7 +267,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( sock_in6 = (struct sockaddr_in6 *)&sockaddr; sock_in6->sin6_family = AF_INET6; sock_in6->sin6_port = htons((unsigned short)port); - ret = in6_pton(str, IPV6_ADDRESS_SPACE, + ret = in6_pton(str, -1, (void *)&sock_in6->sin6_addr.in6_u, -1, &end); if (ret <= 0) { pr_err("in6_pton returned: %d\n", ret); @@ -753,7 +753,7 @@ static ssize_t lio_target_nacl_show_info( break; } - rb += sprintf(page+rb, " Address %s %s", conn->login_ip, + rb += sprintf(page+rb, " Address %pISc %s", &conn->login_sockaddr, (conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); rb += sprintf(page+rb, " StatSN: 0x%08x\n", diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 007299a..b7ef6fa 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -728,8 +728,8 @@ int iscsi_post_login_handler( stop_timer = 1; } - pr_debug("iSCSI Login successful on CID: %hu from %s to" - " %pISpc,%hu\n", conn->cid, conn->login_ip, + pr_debug("iSCSI Login successful on CID: %hu from %pISpc to" + " %pISpc,%hu\n", conn->cid, &conn->login_sockaddr, &conn->local_sockaddr, tpg->tpgt); list_add_tail(&conn->conn_list, &sess->sess_conn_list); @@ -774,8 +774,8 @@ int iscsi_post_login_handler( pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n"); sess->session_state = TARG_SESS_STATE_LOGGED_IN; - pr_debug("iSCSI Login successful on CID: %hu from %s to %pISpc,%hu\n", - conn->cid, conn->login_ip, &conn->local_sockaddr, + pr_debug("iSCSI Login successful on CID: %hu from %pISpc to %pISpc,%hu\n", + conn->cid, &conn->login_sockaddr, &conn->local_sockaddr, tpg->tpgt); spin_lock_bh(&sess->conn_lock); @@ -1015,13 +1015,15 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 1); if (!rc) { - if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) - snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]", - &sock_in6.sin6_addr.in6_u); - else - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4", - &sock_in6.sin6_addr.s6_addr32[3]); - conn->login_port = ntohs(sock_in6.sin6_port); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { + memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6)); + } else { + /* Pretend to be an ipv4 socket */ + sock_in.sin_family = AF_INET; + sock_in.sin_port = sock_in6.sin6_port; + memcpy(&sock_in.sin_addr, &sock_in6.sin6_addr.s6_addr32[3], 4); + memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in)); + } } rc = conn->sock->ops->getname(conn->sock, @@ -1042,11 +1044,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in, &err, 1); - if (!rc) { - sprintf(conn->login_ip, "%pI4", - &sock_in.sin_addr.s_addr); - conn->login_port = ntohs(sock_in.sin_port); - } + if (!rc) + memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in)); rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in, &err, 0); @@ -1311,8 +1310,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) conn->network_transport = np->np_network_transport; - pr_debug("Received iSCSI login request from %s on %s Network" - " Portal %pISpc\n", conn->login_ip, np->np_transport->name, + pr_debug("Received iSCSI login request from %pISpc on %s Network" + " Portal %pISpc\n", &conn->login_sockaddr, np->np_transport->name, &conn->local_sockaddr); pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n"); diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index 5e1349a..9dd94ff 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -430,7 +430,7 @@ static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( int ret; spin_lock(&lstat->lock); - ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr); + ret = snprintf(page, PAGE_SIZE, "%pISc\n", &lstat->last_intr_fail_sockaddr); spin_unlock(&lstat->lock); return ret; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 7df4fac..428b0d9 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1372,6 +1372,33 @@ int tx_data( return iscsit_do_tx_data(conn, &c); } +static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y) +{ + switch (x->ss_family) { + case AF_INET: { + struct sockaddr_in *sinx = (struct sockaddr_in *)x; + struct sockaddr_in *siny = (struct sockaddr_in *)y; + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) + return false; + if (sinx->sin_port != siny->sin_port) + return false; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; + struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) + return false; + if (sinx->sin6_port != siny->sin6_port) + return false; + break; + } + default: + return false; + } + return true; +} + void iscsit_collect_login_stats( struct iscsi_conn *conn, u8 status_class, @@ -1388,7 +1415,7 @@ void iscsit_collect_login_stats( ls = &tiqn->login_stats; spin_lock(&ls->lock); - if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) && + if (sockaddr_equal(&conn->login_sockaddr, &ls->last_intr_fail_sockaddr) && ((get_jiffies_64() - ls->last_fail_time) < 10)) { /* We already have the failure info for this login */ spin_unlock(&ls->lock); @@ -1428,8 +1455,7 @@ void iscsit_collect_login_stats( ls->last_intr_fail_ip_family = conn->login_family; - snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE, - "%s", conn->login_ip); + ls->last_intr_fail_sockaddr = conn->login_sockaddr; ls->last_fail_time = get_jiffies_64(); } diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index b943411..f3eb998 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -528,8 +528,7 @@ struct iscsi_conn { u32 exp_statsn; /* Per connection status sequence number */ u32 stat_sn; -#define IPV6_ADDRESS_SPACE 48 - unsigned char login_ip[IPV6_ADDRESS_SPACE]; + struct __kernel_sockaddr_storage login_sockaddr; struct __kernel_sockaddr_storage local_sockaddr; int conn_usage_count; int conn_waiting_on_uc; diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h index 3ff76b4..f2a583c 100644 --- a/include/target/iscsi/iscsi_target_stat.h +++ b/include/target/iscsi/iscsi_target_stat.h @@ -50,7 +50,7 @@ struct iscsi_login_stats { u64 last_fail_time; /* time stamp (jiffies) */ u32 last_fail_type; int last_intr_fail_ip_family; - unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE]; + struct __kernel_sockaddr_storage last_intr_fail_sockaddr; char last_intr_fail_name[224]; } ____cacheline_aligned; -- cgit v0.10.2 From 13a3cf08fa1e4b3a252f24202d47a556242aea03 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 Aug 2015 10:26:06 -0700 Subject: target/iscsi: Replace __kernel_sockaddr_storage with sockaddr_storage It appears to be what the rest of the kernel does, so let's do it too. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index aa08606..20a0a46 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3102,7 +3102,7 @@ out: static int isert_setup_np(struct iscsi_np *np, - struct __kernel_sockaddr_storage *ksockaddr) + struct sockaddr_storage *ksockaddr) { struct isert_np *isert_np; struct rdma_cm_id *isert_lid; @@ -3124,7 +3124,7 @@ isert_setup_np(struct iscsi_np *np, * in iscsi_target_configfs.c code.. */ memcpy(&np->np_sockaddr, ksockaddr, - sizeof(struct __kernel_sockaddr_storage)); + sizeof(struct sockaddr_storage)); isert_lid = isert_setup_id(isert_np); if (IS_ERR(isert_lid)) { diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index f752235..a9257a0 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -269,7 +269,7 @@ int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg, } bool iscsit_check_np_match( - struct __kernel_sockaddr_storage *sockaddr, + struct sockaddr_storage *sockaddr, struct iscsi_np *np, int network_transport) { @@ -311,7 +311,7 @@ bool iscsit_check_np_match( * Called with mutex np_lock held */ static struct iscsi_np *iscsit_get_np( - struct __kernel_sockaddr_storage *sockaddr, + struct sockaddr_storage *sockaddr, int network_transport) { struct iscsi_np *np; @@ -342,7 +342,7 @@ static struct iscsi_np *iscsit_get_np( } struct iscsi_np *iscsit_add_np( - struct __kernel_sockaddr_storage *sockaddr, + struct sockaddr_storage *sockaddr, int network_transport) { struct iscsi_np *np; @@ -3452,7 +3452,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); - struct __kernel_sockaddr_storage *sockaddr; + struct sockaddr_storage *sockaddr; if (np->np_network_transport != network_transport) continue; diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index d294f03..4cf2c0f 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -10,9 +10,9 @@ extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); extern void iscsit_login_kref_put(struct kref *); extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, struct iscsi_tpg_np *); -extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, +extern bool iscsit_check_np_match(struct sockaddr_storage *, struct iscsi_np *, int); -extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, +extern struct iscsi_np *iscsit_add_np(struct sockaddr_storage *, int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *, bool); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 5afa629..c7461d7 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -220,7 +220,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( struct iscsi_portal_group *tpg; struct iscsi_tpg_np *tpg_np; char *str, *str2, *ip_str, *port_str; - struct __kernel_sockaddr_storage sockaddr; + struct sockaddr_storage sockaddr; struct sockaddr_in *sock_in; struct sockaddr_in6 *sock_in6; unsigned long port; @@ -235,7 +235,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( memset(buf, 0, MAX_PORTAL_LEN + 1); snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name); - memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); + memset(&sockaddr, 0, sizeof(struct sockaddr_storage)); str = strstr(buf, "["); if (str) { diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index b7ef6fa..fc7b796 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -877,7 +877,7 @@ static void iscsi_stop_login_thread_timer(struct iscsi_np *np) int iscsit_setup_np( struct iscsi_np *np, - struct __kernel_sockaddr_storage *sockaddr) + struct sockaddr_storage *sockaddr) { struct socket *sock = NULL; int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len; @@ -916,7 +916,7 @@ int iscsit_setup_np( * in iscsi_target_configfs.c code.. */ memcpy(&np->np_sockaddr, sockaddr, - sizeof(struct __kernel_sockaddr_storage)); + sizeof(struct sockaddr_storage)); if (sockaddr->ss_family == AF_INET6) len = sizeof(struct sockaddr_in6); @@ -975,7 +975,7 @@ fail: int iscsi_target_setup_login_socket( struct iscsi_np *np, - struct __kernel_sockaddr_storage *sockaddr) + struct sockaddr_storage *sockaddr) { struct iscsit_transport *t; int rc; diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 1c73580..35aeffe 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -5,9 +5,9 @@ extern int iscsi_login_setup_crypto(struct iscsi_conn *); extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *); extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32); extern int iscsit_setup_np(struct iscsi_np *, - struct __kernel_sockaddr_storage *); + struct sockaddr_storage *); extern int iscsi_target_setup_login_socket(struct iscsi_np *, - struct __kernel_sockaddr_storage *); + struct sockaddr_storage *); extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index d61ae51..23c95cd 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -431,7 +431,7 @@ struct iscsi_tpg_np *iscsit_tpg_locate_child_np( static bool iscsit_tpg_check_network_portal( struct iscsi_tiqn *tiqn, - struct __kernel_sockaddr_storage *sockaddr, + struct sockaddr_storage *sockaddr, int network_transport) { struct iscsi_portal_group *tpg; @@ -460,7 +460,7 @@ static bool iscsit_tpg_check_network_portal( struct iscsi_tpg_np *iscsit_tpg_add_network_portal( struct iscsi_portal_group *tpg, - struct __kernel_sockaddr_storage *sockaddr, + struct sockaddr_storage *sockaddr, struct iscsi_tpg_np *tpg_np_parent, int network_transport) { diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 1c0b1d6..9db32bd 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -22,7 +22,7 @@ extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, - struct __kernel_sockaddr_storage *, struct iscsi_tpg_np *, + struct sockaddr_storage *, struct iscsi_tpg_np *, int); extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, struct iscsi_tpg_np *); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index f3eb998..84abe73 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -528,8 +528,8 @@ struct iscsi_conn { u32 exp_statsn; /* Per connection status sequence number */ u32 stat_sn; - struct __kernel_sockaddr_storage login_sockaddr; - struct __kernel_sockaddr_storage local_sockaddr; + struct sockaddr_storage login_sockaddr; + struct sockaddr_storage local_sockaddr; int conn_usage_count; int conn_waiting_on_uc; atomic_t check_immediate_queue; @@ -779,7 +779,7 @@ struct iscsi_np { spinlock_t np_thread_lock; struct completion np_restart_comp; struct socket *np_socket; - struct __kernel_sockaddr_storage np_sockaddr; + struct sockaddr_storage np_sockaddr; struct task_struct *np_thread; struct timer_list np_login_timer; void *np_context; diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h index f2a583c..e615bb4 100644 --- a/include/target/iscsi/iscsi_target_stat.h +++ b/include/target/iscsi/iscsi_target_stat.h @@ -50,7 +50,7 @@ struct iscsi_login_stats { u64 last_fail_time; /* time stamp (jiffies) */ u32 last_fail_type; int last_intr_fail_ip_family; - struct __kernel_sockaddr_storage last_intr_fail_sockaddr; + struct sockaddr_storage last_intr_fail_sockaddr; char last_intr_fail_name[224]; } ____cacheline_aligned; diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index e6bb166..90e37fa 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -9,7 +9,7 @@ struct iscsit_transport { int priv_size; struct module *owner; struct list_head t_node; - int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); + int (*iscsit_setup_np)(struct iscsi_np *, struct sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); void (*iscsit_wait_conn)(struct iscsi_conn *); -- cgit v0.10.2 From fb77bb5376a55f4e6c8d9243249e82831a276ee5 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:47 +0530 Subject: mpt3sas: Added Combined Reply Queue feature to extend up-to 96 MSIX vector support In this patch, increased the number of MSIX vector support for SAS3 C0 HBAs to up-to 96. Following are changes that are done in this patch 1. This feature is enabled only for SAS3 C0 and higher revision cards and also only when reply post free queue count is greater than 8. 2. To support this feature 12 SupplementalReplyPostHostIndex system interfaces are used. MSI-X index numbered from 0 to 7 use the first SupplementalReplyPostHostIndex system interface to update its corresponding ReplyPostHostIndex values, MSI-X index numbered from 8 to 15 will use the second SupplementalReplyPostHostIndex system interface and so on. These 12 SuppementalReplyPostHostIndex system interfaces address are saved in the array replyPostRegisterIndex[]. 3. As each SupplementalReplyPostHostIndex register supports 8 MSI-X vectors. So MSIxIndex field in these register must contain a value between 0 and 7. 4. After processing the reply descriptors from a reply post free queues then update the new reply post host index value in ReplyPostHostIndex field and (msix_index mod 8) value in MSIxIndex field of SupplementalReplyPostHostIndex register. The Address of this SupplementalReplyPostHostIndex register is retrived from (msix_index/8)th entry of replyPostRegisterIndex[] array. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 43f87e9..f27c672 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -83,10 +83,10 @@ static int msix_disable = -1; module_param(msix_disable, int, 0); MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)"); -static int max_msix_vectors = 8; +static int max_msix_vectors = -1; module_param(max_msix_vectors, int, 0); MODULE_PARM_DESC(max_msix_vectors, - " max msix vectors - (default=8)"); + " max msix vectors"); static int mpt3sas_fwfault_debug; MODULE_PARM_DESC(mpt3sas_fwfault_debug, @@ -1009,8 +1009,30 @@ _base_interrupt(int irq, void *bus_id) } wmb(); - writel(reply_q->reply_post_host_index | (msix_index << - MPI2_RPHI_MSIX_INDEX_SHIFT), &ioc->chip->ReplyPostHostIndex); + + /* Update Reply Post Host Index. + * For those HBA's which support combined reply queue feature + * 1. Get the correct Supplemental Reply Post Host Index Register. + * i.e. (msix_index / 8)th entry from Supplemental Reply Post Host + * Index Register address bank i.e replyPostRegisterIndex[], + * 2. Then update this register with new reply host index value + * in ReplyPostIndex field and the MSIxIndex field with + * msix_index value reduced to a value between 0 and 7, + * using a modulo 8 operation. Since each Supplemental Reply Post + * Host Index Register supports 8 MSI-X vectors. + * + * For other HBA's just update the Reply Post Host Index register with + * new reply host index value in ReplyPostIndex Field and msix_index + * value in MSIxIndex field. + */ + if (ioc->msix96_vector) + writel(reply_q->reply_post_host_index | ((msix_index & 7) << + MPI2_RPHI_MSIX_INDEX_SHIFT), + ioc->replyPostRegisterIndex[msix_index/8]); + else + writel(reply_q->reply_post_host_index | (msix_index << + MPI2_RPHI_MSIX_INDEX_SHIFT), + &ioc->chip->ReplyPostHostIndex); atomic_dec(&reply_q->busy); return IRQ_HANDLED; } @@ -1560,8 +1582,6 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc) pci_read_config_word(ioc->pdev, base + 2, &message_control); ioc->msix_vector_count = (message_control & 0x3FF) + 1; - if (ioc->msix_vector_count > 8) - ioc->msix_vector_count = 8; dinitprintk(ioc, pr_info(MPT3SAS_FMT "msix is supported, vector_count(%d)\n", ioc->name, ioc->msix_vector_count)); @@ -1882,6 +1902,36 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc) if (r) goto out_fail; + /* Use the Combined reply queue feature only for SAS3 C0 & higher + * revision HBAs and also only when reply queue count is greater than 8 + */ + if (ioc->msix96_vector && ioc->reply_queue_count > 8) { + /* Determine the Supplemental Reply Post Host Index Registers + * Addresse. Supplemental Reply Post Host Index Registers + * starts at offset MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET and + * each register is at offset bytes of + * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET from previous one. + */ + ioc->replyPostRegisterIndex = kcalloc( + MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT, + sizeof(resource_size_t *), GFP_KERNEL); + if (!ioc->replyPostRegisterIndex) { + dfailprintk(ioc, printk(MPT3SAS_FMT + "allocation for reply Post Register Index failed!!!\n", + ioc->name)); + r = -ENOMEM; + goto out_fail; + } + + for (i = 0; i < MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT; i++) { + ioc->replyPostRegisterIndex[i] = (resource_size_t *) + ((u8 *)&ioc->chip->Doorbell + + MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET + + (i * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET)); + } + } else + ioc->msix96_vector = 0; + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) pr_info(MPT3SAS_FMT "%s: IRQ %d\n", reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" : @@ -1903,6 +1953,8 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc) pci_release_selected_regions(ioc->pdev, ioc->bars); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); + if (ioc->msix96_vector) + kfree(ioc->replyPostRegisterIndex); return r; } @@ -4524,8 +4576,15 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) /* initialize reply post host index */ list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { - writel(reply_q->msix_index << MPI2_RPHI_MSIX_INDEX_SHIFT, - &ioc->chip->ReplyPostHostIndex); + if (ioc->msix96_vector) + writel((reply_q->msix_index & 7)<< + MPI2_RPHI_MSIX_INDEX_SHIFT, + ioc->replyPostRegisterIndex[reply_q->msix_index/8]); + else + writel(reply_q->msix_index << + MPI2_RPHI_MSIX_INDEX_SHIFT, + &ioc->chip->ReplyPostHostIndex); + if (!_base_is_controller_msix_enabled(ioc)) goto skip_init_reply_post_host_index; } @@ -4579,6 +4638,9 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) _base_free_irq(ioc); _base_disable_msix(ioc); + if (ioc->msix96_vector) + kfree(ioc->replyPostRegisterIndex); + if (ioc->chip_phys && ioc->chip) iounmap(ioc->chip); ioc->chip_phys = 0; @@ -4602,6 +4664,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) { int r, i; int cpu_id, last_cpu_id = 0; + u8 revision; dinitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name, __func__)); @@ -4621,6 +4684,20 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) goto out_free_resources; } + /* Check whether the controller revision is C0 or above. + * only C0 and above revision controllers support 96 MSI-X vectors. + */ + revision = ioc->pdev->revision; + + if ((ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3004 || + ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3008 || + ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_1 || + ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_2 || + ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_5 || + ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_6) && + (revision >= 0x02)) + ioc->msix96_vector = 1; + ioc->rdpq_array_enable_assigned = 0; ioc->dma_mask = 0; r = mpt3sas_base_map_resources(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index afa8816..a7386ee 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -158,6 +158,13 @@ #define MPT3_DIAG_BUFFER_IS_RELEASED (0x02) #define MPT3_DIAG_BUFFER_IS_DIAG_RESET (0x04) +/* + * Combined Reply Queue constants, + * There are twelve Supplemental Reply Post Host Index Registers + * and each register is at offset 0x10 bytes from the previous one. + */ +#define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT 12 +#define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET (0x10) /* OEM Identifiers */ #define MFG10_OEM_ID_INVALID (0x00000000) @@ -728,7 +735,8 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); * is assigned only ones * @reply_queue_count: number of reply queue's * @reply_queue_list: link list contaning the reply queue info - * @reply_post_host_index: head index in the pool where FW completes IO + * @msix96_vector: 96 MSI-X vector support + * @replyPostRegisterIndex: index of next position in Reply Desc Post Queue * @delayed_tr_list: target reset link list * @delayed_tr_volume_list: volume target reset link list * @@temp_sensors_count: flag to carry the number of temperature sensors @@ -937,6 +945,10 @@ struct MPT3SAS_ADAPTER { u8 reply_queue_count; struct list_head reply_queue_list; + u8 msix96_vector; + /* reply post register index */ + resource_size_t **replyPostRegisterIndex; + struct list_head delayed_tr_list; struct list_head delayed_tr_volume_list; u8 temp_sensors_count; -- cgit v0.10.2 From 4dc8c8087f0304ff8d06f862520406b11aac4a66 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:48 +0530 Subject: mpt3sas: Get IOC_FACTS information using handshake protocol only after HBA card gets into READY or Operational state. Driver initialization fails if driver tries to send IOC facts request message when the IOC is in reset or in a fault state. This patch will make sure that 1.Driver to send IOC facts request message only if HBA is in operational or ready state. 2.If IOC is in fault state, a diagnostic reset would be issued. 3.If IOC is in reset state then driver will wait for 10 seconds to exit out of reset state. If the HBA continues to be in reset state, then the HBA wouldn't be claimed by the driver. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Tomas Henzl Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index f27c672..9a6ac1e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3191,6 +3191,9 @@ _base_wait_on_iocstate(struct MPT3SAS_ADAPTER *ioc, u32 ioc_state, int timeout, * Notes: MPI2_HIS_IOC2SYS_DB_STATUS - set to one when IOC writes to doorbell. */ static int +_base_diag_reset(struct MPT3SAS_ADAPTER *ioc, int sleep_flag); + +static int _base_wait_for_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout, int sleep_flag) { @@ -3733,6 +3736,64 @@ _base_get_port_facts(struct MPT3SAS_ADAPTER *ioc, int port, int sleep_flag) } /** + * _base_wait_for_iocstate - Wait until the card is in READY or OPERATIONAL + * @ioc: per adapter object + * @timeout: + * @sleep_flag: CAN_SLEEP or NO_SLEEP + * + * Returns 0 for success, non-zero for failure. + */ +static int +_base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout, + int sleep_flag) +{ + u32 ioc_state; + int rc; + + dinitprintk(ioc, printk(MPT3SAS_FMT "%s\n", ioc->name, + __func__)); + + if (ioc->pci_error_recovery) { + dfailprintk(ioc, printk(MPT3SAS_FMT + "%s: host in pci error recovery\n", ioc->name, __func__)); + return -EFAULT; + } + + ioc_state = mpt3sas_base_get_iocstate(ioc, 0); + dhsprintk(ioc, printk(MPT3SAS_FMT "%s: ioc_state(0x%08x)\n", + ioc->name, __func__, ioc_state)); + + if (((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_READY) || + (ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_OPERATIONAL) + return 0; + + if (ioc_state & MPI2_DOORBELL_USED) { + dhsprintk(ioc, printk(MPT3SAS_FMT + "unexpected doorbell active!\n", ioc->name)); + goto issue_diag_reset; + } + + if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { + mpt3sas_base_fault_info(ioc, ioc_state & + MPI2_DOORBELL_DATA_MASK); + goto issue_diag_reset; + } + + ioc_state = _base_wait_on_iocstate(ioc, MPI2_IOC_STATE_READY, + timeout, sleep_flag); + if (ioc_state) { + dfailprintk(ioc, printk(MPT3SAS_FMT + "%s: failed going to ready state (ioc_state=0x%x)\n", + ioc->name, __func__, ioc_state)); + return -EFAULT; + } + + issue_diag_reset: + rc = _base_diag_reset(ioc, sleep_flag); + return rc; +} + +/** * _base_get_ioc_facts - obtain ioc facts reply and save in ioc * @ioc: per adapter object * @sleep_flag: CAN_SLEEP or NO_SLEEP @@ -3750,6 +3811,13 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) dinitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name, __func__)); + r = _base_wait_for_iocstate(ioc, 10, sleep_flag); + if (r) { + dfailprintk(ioc, printk(MPT3SAS_FMT + "%s: failed getting to correct state\n", + ioc->name, __func__)); + return r; + } mpi_reply_sz = sizeof(Mpi2IOCFactsReply_t); mpi_request_sz = sizeof(Mpi2IOCFactsRequest_t); memset(&mpi_request, 0, mpi_request_sz); -- cgit v0.10.2 From e4bc7f5c21a18cab9acd30940df0ee791fcd7b9e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:49 +0530 Subject: mpt3sas: Don't block the drive when drive addition under the control of SML During hot-plugging of a disk(having a flaky link), the disk addition stops and any further disk addition or removal doesn't happen on that controller. This is because, when driver receives DELAY_NOT_RESPONDING event for a disk while it is undergoing addition at the SCSI Transport layer, the driver would block the I/O to that disk resulting in a deadlock. i.e the disk addition work couldn't be completed at the SCSI Transport Layer as it can't send any I/Os (such as Inquiry, Report LUNs etc) to the disk as I/Os are blocked to this drive. Also any subsequent device removal (TARGET_NOT_RESPONDING) or link update(RC_PHY_CHANGED) event couldn't be processed as they are in the queue to get processed after disk addition event. Description of Change: Don't block the drive when drive addition is under the control of SML. So that SML won't be blocked of issuing the device dicovery commands (such as Inquiry, Report LUNs etc). Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index a7386ee..01d92db 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -301,7 +301,8 @@ struct _internal_cmd { * @responding: used in _scsih_sas_device_mark_responding * @fast_path: fast path feature enable bit * @pfa_led_on: flag for PFA LED status - * + * @pend_sas_rphy_add: flag to check if device is in sas_rphy_add() + * addition routine. */ struct _sas_device { struct list_head list; @@ -322,6 +323,7 @@ struct _sas_device { u8 responding; u8 fast_path; u8 pfa_led_on; + u8 pend_sas_rphy_add; }; /** diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 5a97e32..d457dba 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2644,6 +2644,11 @@ _scsih_block_io_device(struct MPT3SAS_ADAPTER *ioc, u16 handle) { struct MPT3SAS_DEVICE *sas_device_priv_data; struct scsi_device *sdev; + struct _sas_device *sas_device; + + sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + if (!sas_device) + return; shost_for_each_device(sdev, ioc->shost) { sas_device_priv_data = sdev->hostdata; @@ -2653,6 +2658,8 @@ _scsih_block_io_device(struct MPT3SAS_ADAPTER *ioc, u16 handle) continue; if (sas_device_priv_data->block) continue; + if (sas_device->pend_sas_rphy_add) + continue; sas_device_priv_data->block = 1; scsi_internal_device_block(sdev); sdev_printk(KERN_INFO, sdev, diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index efb98af..7a7aa68 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -649,6 +649,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, unsigned long flags; struct _sas_node *sas_node; struct sas_rphy *rphy; + struct _sas_device *sas_device = NULL; int i; struct sas_port *port; @@ -731,10 +732,27 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, mpt3sas_port->remote_identify.device_type); rphy->identify = mpt3sas_port->remote_identify; + + if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) { + sas_device = mpt3sas_scsih_sas_device_find_by_sas_address(ioc, + mpt3sas_port->remote_identify.sas_address); + if (!sas_device) { + dfailprintk(ioc, printk(MPT3SAS_FMT + "failure at %s:%d/%s()!\n", + ioc->name, __FILE__, __LINE__, __func__)); + goto out_fail; + } + sas_device->pend_sas_rphy_add = 1; + } + if ((sas_rphy_add(rphy))) { pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); } + + if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) + sas_device->pend_sas_rphy_add = 0; + if ((ioc->logging_level & MPT_DEBUG_TRANSPORT)) dev_printk(KERN_INFO, &rphy->dev, "add: handle(0x%04x), sas_addr(0x%016llx)\n", -- cgit v0.10.2 From 580d4e3153f0d7a9a9235b675b0b7b13e2185a8b Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:50 +0530 Subject: mpt3sas: Remove redundancy code while freeing the controller resources. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 9a6ac1e..7e8ede2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1813,6 +1813,36 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc) } /** + * mpt3sas_base_unmap_resources - free controller resources + * @ioc: per adapter object + */ +void +mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc) +{ + struct pci_dev *pdev = ioc->pdev; + + dexitprintk(ioc, printk(MPT3SAS_FMT "%s\n", + ioc->name, __func__)); + + _base_free_irq(ioc); + _base_disable_msix(ioc); + + if (ioc->msix96_vector) + kfree(ioc->replyPostRegisterIndex); + + if (ioc->chip_phys) { + iounmap(ioc->chip); + ioc->chip_phys = 0; + } + + if (pci_is_enabled(pdev)) { + pci_release_selected_regions(ioc->pdev, ioc->bars); + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + } +} + +/** * mpt3sas_base_map_resources - map in controller resources (io/irq/memap) * @ioc: per adapter object * @@ -1947,14 +1977,7 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc) return 0; out_fail: - if (ioc->chip_phys) - iounmap(ioc->chip); - ioc->chip_phys = 0; - pci_release_selected_regions(ioc->pdev, ioc->bars); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); - if (ioc->msix96_vector) - kfree(ioc->replyPostRegisterIndex); + mpt3sas_base_unmap_resources(ioc); return r; } @@ -4691,8 +4714,6 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) void mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) { - struct pci_dev *pdev = ioc->pdev; - dexitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name, __func__)); @@ -4703,21 +4724,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) ioc->shost_recovery = 0; } - _base_free_irq(ioc); - _base_disable_msix(ioc); - - if (ioc->msix96_vector) - kfree(ioc->replyPostRegisterIndex); - - if (ioc->chip_phys && ioc->chip) - iounmap(ioc->chip); - ioc->chip_phys = 0; - - if (pci_is_enabled(pdev)) { - pci_release_selected_regions(ioc->pdev, ioc->bars); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); - } + mpt3sas_base_unmap_resources(ioc); return; } -- cgit v0.10.2 From a94bea343c2f747c62ef66b52d2430e6aeb75434 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:51 +0530 Subject: mpt3sas: MPI 2.5 Rev I (2.5.4) specifications. Update MPI 2.5 Release: MPI 2.5 Rev I (2.5.4) specification and 2.00.33 header files Below is the change set from the MPI specification for I Rev 1) Added Base Enclosure Level bit to the Flags field of Manufacturing Page 7. 2) Updated description of the MaxTargetPortConnectTime field of SAS IO Unit Page 1. 3) Added EnclosureLevel and ConnectorName fields to SAS Device Page 0. Also, added EnclosureLevel and ConnectorName Valid bit to the Flags field. 4) Added EnclosureLevel field to SAS Enclosure Page 0. Also, added EnclosureLevel Valid bit to the Flags field. 5) Added value for BIOS image to HashImageType. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h index c34c115..d730c5c 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2.h @@ -8,7 +8,7 @@ * scatter/gather formats. * Creation Date: June 21, 2006 * - * mpi2.h Version: 02.00.31 + * mpi2.h Version: 02.00.33 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -88,6 +88,8 @@ * Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET. * 04-09-13 02.00.30 Bumped MPI2_HEADER_VERSION_UNIT. * 04-17-13 02.00.31 Bumped MPI2_HEADER_VERSION_UNIT. + * 08-19-13 02.00.32 Bumped MPI2_HEADER_VERSION_UNIT. + * 12-05-13 02.00.33 Bumped MPI2_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- */ @@ -121,7 +123,7 @@ #define MPI2_VERSION_02_05 (0x0205) /*Unit and Dev versioning for this MPI header set */ -#define MPI2_HEADER_VERSION_UNIT (0x1F) +#define MPI2_HEADER_VERSION_UNIT (0x21) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h index e261a31..62dfbf6 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h @@ -6,7 +6,7 @@ * Title: MPI Configuration messages and pages * Creation Date: November 10, 2006 * - * mpi2_cnfg.h Version: 02.00.26 + * mpi2_cnfg.h Version: 02.00.27 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -165,6 +165,16 @@ * match the specification. * 08-19-13 02.00.26 Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for * future use. + * 12-05-13 02.00.27 Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for + * MPI2_CONFIG_PAGE_MAN_7. + * Added EnclosureLevel and ConnectorName fields to + * MPI2_CONFIG_PAGE_SAS_DEV_0. + * Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for + * MPI2_CONFIG_PAGE_SAS_DEV_0. + * Added EnclosureLevel field to + * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0. + * Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for + * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0. * -------------------------------------------------------------------------- */ @@ -724,6 +734,7 @@ typedef struct _MPI2_CONFIG_PAGE_MAN_7 { #define MPI2_MANUFACTURING7_PAGEVERSION (0x01) /*defines for the Flags field */ +#define MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL (0x00000008) #define MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER (0x00000002) #define MPI2_MANPAGE7_FLAG_USE_SLOT_INFO (0x00000001) @@ -2633,9 +2644,9 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_0 { U8 ControlGroup; /*0x2E */ U8 - Reserved1; /*0x2F */ + EnclosureLevel; /*0x2F */ U32 - Reserved2; /*0x30 */ + ConnectorName[4]; /*0x30 */ U32 Reserved3; /*0x34 */ } MPI2_CONFIG_PAGE_SAS_DEV_0, @@ -2643,7 +2654,7 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_0 { Mpi2SasDevicePage0_t, *pMpi2SasDevicePage0_t; -#define MPI2_SASDEVICE0_PAGEVERSION (0x08) +#define MPI2_SASDEVICE0_PAGEVERSION (0x09) /*values for SAS Device Page 0 AccessStatus field */ #define MPI2_SAS_DEVICE0_ASTATUS_NO_ERRORS (0x00) @@ -2683,6 +2694,7 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_0 { #define MPI2_SAS_DEVICE0_FLAGS_SATA_NCQ_SUPPORTED (0x0020) #define MPI2_SAS_DEVICE0_FLAGS_SATA_FUA_SUPPORTED (0x0010) #define MPI2_SAS_DEVICE0_FLAGS_PORT_SELECTOR_ATTACH (0x0008) +#define MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID (0x0002) #define MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT (0x0001) @@ -3019,8 +3031,10 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 { NumSlots; /*0x18 */ U16 StartSlot; /*0x1A */ - U16 + U8 Reserved2; /*0x1C */ + U8 + EnclosureLevel; /*0x1D */ U16 SEPDevHandle; /*0x1E */ U32 @@ -3031,9 +3045,10 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 { *PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0, Mpi2SasEnclosurePage0_t, *pMpi2SasEnclosurePage0_t; -#define MPI2_SASENCLOSURE0_PAGEVERSION (0x03) +#define MPI2_SASENCLOSURE0_PAGEVERSION (0x04) /*values for SAS Enclosure Page 0 Flags field */ +#define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010) #define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK (0x000F) #define MPI2_SAS_ENCLS0_FLAGS_MNG_UNKNOWN (0x0000) #define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SES (0x0001) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h index 4908309..d7598cc 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h @@ -6,7 +6,7 @@ * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages * Creation Date: October 11, 2006 * - * mpi2_ioc.h Version: 02.00.23 + * mpi2_ioc.h Version: 02.00.24 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -132,6 +132,7 @@ * Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE. * Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY. * Added Encrypted Hash Extended Image. + * 12-05-13 02.00.24 Added MPI25_HASH_IMAGE_TYPE_BIOS. * -------------------------------------------------------------------------- */ @@ -1598,6 +1599,7 @@ Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t; /* values for HashImageType */ #define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00) #define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01) +#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02) /* values for HashAlgorithm */ #define MPI25_HASH_ALGORITHM_UNUSED (0x00) -- cgit v0.10.2 From e6d45e3e7e6582fa206ef84631639ce70d50e5c5 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:52 +0530 Subject: mpt3sas: Provides the physical location of sas drives This Patch will provide more details of the devices such as slot number, enclosure logical id, enclosure level & connector name in the following scenarios, - When end device is added in the topology, - When the end device is removed from the setup, - When the SCSI mid layer issues TASK ABORT/ DEVICE RESET/ TARGET RESET during error handling, - When any command to the device fails with Sense key Hardware error or Medium error or Unit Attention, - When firmware returns device error or device not ready status for the end device, - When a Predicted fault is detected on an end device. This information can be used by the user to identify the location of the desired drive in the topology. Driver will get these information by reading the sas device page0. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 01d92db..da82a97 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -324,6 +324,8 @@ struct _sas_device { u8 fast_path; u8 pfa_led_on; u8 pend_sas_rphy_add; + u8 enclosure_level; + u8 connector_name[4]; }; /** diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index d457dba..ec3b3d3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -585,6 +585,22 @@ _scsih_sas_device_remove(struct MPT3SAS_ADAPTER *ioc, if (!sas_device) return; + pr_info(MPT3SAS_FMT + "removing handle(0x%04x), sas_addr(0x%016llx)\n", + ioc->name, sas_device->handle, + (unsigned long long) sas_device->sas_address); + + if (sas_device->enclosure_handle != 0) + pr_info(MPT3SAS_FMT + "removing enclosure logical id(0x%016llx), slot(%d)\n", + ioc->name, (unsigned long long) + sas_device->enclosure_logical_id, sas_device->slot); + + if (sas_device->connector_name[0] != '\0') + pr_info(MPT3SAS_FMT + "removing enclosure level(0x%04x), connector name( %s)\n", + ioc->name, sas_device->enclosure_level, + sas_device->connector_name); spin_lock_irqsave(&ioc->sas_device_lock, flags); list_del(&sas_device->list); @@ -663,6 +679,18 @@ _scsih_sas_device_add(struct MPT3SAS_ADAPTER *ioc, ioc->name, __func__, sas_device->handle, (unsigned long long)sas_device->sas_address)); + if (sas_device->enclosure_handle != 0) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enclosure logical id(0x%016llx), slot( %d)\n", + ioc->name, __func__, (unsigned long long) + sas_device->enclosure_logical_id, sas_device->slot)); + + if (sas_device->connector_name[0] != '\0') + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enclosure level(0x%04x), connector name( %s)\n", + ioc->name, __func__, + sas_device->enclosure_level, sas_device->connector_name)); + spin_lock_irqsave(&ioc->sas_device_lock, flags); list_add_tail(&sas_device->list, &ioc->sas_device_list); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -704,6 +732,18 @@ _scsih_sas_device_init_add(struct MPT3SAS_ADAPTER *ioc, __func__, sas_device->handle, (unsigned long long)sas_device->sas_address)); + if (sas_device->enclosure_handle != 0) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enclosure logical id(0x%016llx), slot( %d)\n", + ioc->name, __func__, (unsigned long long) + sas_device->enclosure_logical_id, sas_device->slot)); + + if (sas_device->connector_name[0] != '\0') + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enclosure level(0x%04x), connector name( %s)\n", + ioc->name, __func__, sas_device->enclosure_level, + sas_device->connector_name)); + spin_lock_irqsave(&ioc->sas_device_lock, flags); list_add_tail(&sas_device->list, &ioc->sas_device_init_list); _scsih_determine_boot_device(ioc, sas_device, 0); @@ -1772,10 +1812,16 @@ _scsih_slave_configure(struct scsi_device *sdev) "sas_addr(0x%016llx), phy(%d), device_name(0x%016llx)\n", ds, handle, (unsigned long long)sas_device->sas_address, sas_device->phy, (unsigned long long)sas_device->device_name); - sdev_printk(KERN_INFO, sdev, - "%s: enclosure_logical_id(0x%016llx), slot(%d)\n", - ds, (unsigned long long) - sas_device->enclosure_logical_id, sas_device->slot); + if (sas_device->enclosure_handle != 0) + sdev_printk(KERN_INFO, sdev, + "%s: enclosure_logical_id(0x%016llx), slot(%d)\n", + ds, (unsigned long long) + sas_device->enclosure_logical_id, sas_device->slot); + if (sas_device->connector_name[0] != '\0') + sdev_printk(KERN_INFO, sdev, + "%s: enclosure level(0x%04x), connector name( %s)\n", + ds, sas_device->enclosure_level, + sas_device->connector_name); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -2189,10 +2235,17 @@ _scsih_tm_display_info(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd) sas_device->handle, (unsigned long long)sas_device->sas_address, sas_device->phy); - starget_printk(KERN_INFO, starget, - "enclosure_logical_id(0x%016llx), slot(%d)\n", - (unsigned long long)sas_device->enclosure_logical_id, - sas_device->slot); + if (sas_device->enclosure_handle != 0) + starget_printk(KERN_INFO, starget, + "enclosure_logical_id(0x%016llx), slot(%d)\n", + (unsigned long long) + sas_device->enclosure_logical_id, + sas_device->slot); + if (sas_device->connector_name) + starget_printk(KERN_INFO, starget, + "enclosure level(0x%04x),connector name(%s)\n", + sas_device->enclosure_level, + sas_device->connector_name); } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -2813,6 +2866,18 @@ _scsih_tm_tr_send(struct MPT3SAS_ADAPTER *ioc, u16 handle) "setting delete flag: handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, handle, (unsigned long long)sas_address)); + if (sas_device->enclosure_handle != 0) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "setting delete flag:enclosure logical id(0x%016llx)," + " slot(%d)\n", ioc->name, (unsigned long long) + sas_device->enclosure_logical_id, + sas_device->slot)); + if (sas_device->connector_name) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "setting delete flag: enclosure level(0x%04x)," + " connector name( %s)\n", ioc->name, + sas_device->enclosure_level, + sas_device->connector_name)); _scsih_ublock_io_device(ioc, sas_address); sas_target_priv_data->handle = MPT3SAS_INVALID_DEVICE_HANDLE; } @@ -3828,10 +3893,19 @@ _scsih_scsi_ioc_info(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, "\tsas_address(0x%016llx), phy(%d)\n", ioc->name, (unsigned long long) sas_device->sas_address, sas_device->phy); - pr_warn(MPT3SAS_FMT - "\tenclosure_logical_id(0x%016llx), slot(%d)\n", - ioc->name, (unsigned long long) - sas_device->enclosure_logical_id, sas_device->slot); + if (sas_device->enclosure_handle != 0) + pr_warn(MPT3SAS_FMT + "\tenclosure_logical_id(0x%016llx)," + "slot(%d)\n", ioc->name, + (unsigned long long) + sas_device->enclosure_logical_id, + sas_device->slot); + if (sas_device->connector_name[0]) + pr_warn(MPT3SAS_FMT + "\tenclosure level(0x%04x)," + " connector name( %s)\n", ioc->name, + sas_device->enclosure_level, + sas_device->connector_name); } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -4006,7 +4080,16 @@ _scsih_smart_predicted_fault(struct MPT3SAS_ADAPTER *ioc, u16 handle) spin_unlock_irqrestore(&ioc->sas_device_lock, flags); return; } - starget_printk(KERN_WARNING, starget, "predicted fault\n"); + if (sas_device->enclosure_handle != 0) + starget_printk(KERN_INFO, starget, "predicted fault, " + "enclosure logical id(0x%016llx), slot(%d)\n", + (unsigned long long)sas_device->enclosure_logical_id, + sas_device->slot); + if (sas_device->connector_name[0] != '\0') + starget_printk(KERN_WARNING, starget, "predicted fault, " + "enclosure level(0x%04x), connector name( %s)\n", + sas_device->enclosure_level, + sas_device->connector_name); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); if (ioc->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM) @@ -4126,8 +4209,15 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) _scsih_smart_predicted_fault(ioc, le16_to_cpu(mpi_reply->DevHandle)); mpt3sas_trigger_scsi(ioc, data.skey, data.asc, data.ascq); - } +#ifdef CONFIG_SCSI_MPT3SAS_LOGGING + if (!(ioc->logging_level & MPT_DEBUG_REPLY) && + ((scmd->sense_buffer[2] == UNIT_ATTENTION) || + (scmd->sense_buffer[2] == MEDIUM_ERROR) || + (scmd->sense_buffer[2] == HARDWARE_ERROR))) + _scsih_scsi_ioc_info(ioc, scmd, mpi_reply, smid); +#endif + } switch (ioc_status) { case MPI2_IOCSTATUS_BUSY: case MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES: @@ -4795,6 +4885,16 @@ _scsih_check_device(struct MPT3SAS_ADAPTER *ioc, sas_device->handle, handle); sas_target_priv_data->handle = handle; sas_device->handle = handle; + if (sas_device_pg0.Flags & + MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) { + sas_device->enclosure_level = + le16_to_cpu(sas_device_pg0.EnclosureLevel); + memcpy(&sas_device->connector_name[0], + &sas_device_pg0.ConnectorName[0], 4); + } else { + sas_device->enclosure_level = 0; + sas_device->connector_name[0] = '\0'; + } } /* check if device is present */ @@ -4901,14 +5001,24 @@ _scsih_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle, u8 phy_num, ioc->name, __FILE__, __LINE__, __func__); sas_device->enclosure_handle = le16_to_cpu(sas_device_pg0.EnclosureHandle); - sas_device->slot = - le16_to_cpu(sas_device_pg0.Slot); + if (sas_device->enclosure_handle != 0) + sas_device->slot = + le16_to_cpu(sas_device_pg0.Slot); sas_device->device_info = device_info; sas_device->sas_address = sas_address; sas_device->phy = sas_device_pg0.PhyNum; sas_device->fast_path = (le16_to_cpu(sas_device_pg0.Flags) & MPI25_SAS_DEVICE0_FLAGS_FAST_PATH_CAPABLE) ? 1 : 0; + if (sas_device_pg0.Flags & MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) { + sas_device->enclosure_level = + le16_to_cpu(sas_device_pg0.EnclosureLevel); + memcpy(&sas_device->connector_name[0], + &sas_device_pg0.ConnectorName[0], 4); + } else { + sas_device->enclosure_level = 0; + sas_device->connector_name[0] = '\0'; + } /* get enclosure_logical_id */ if (sas_device->enclosure_handle && !(mpt3sas_config_get_enclosure_pg0( ioc, &mpi_reply, &enclosure_pg0, MPI2_SAS_ENCLOS_PGAD_FORM_HANDLE, @@ -4950,6 +5060,18 @@ _scsih_remove_device(struct MPT3SAS_ADAPTER *ioc, ioc->name, __func__, sas_device->handle, (unsigned long long) sas_device->sas_address)); + if (sas_device->enclosure_handle != 0) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enter: enclosure logical id(0x%016llx), slot(%d)\n", + ioc->name, __func__, + (unsigned long long)sas_device->enclosure_logical_id, + sas_device->slot)); + if (sas_device->connector_name[0] != '\0') + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: enter: enclosure level(0x%04x), connector name( %s)\n", + ioc->name, __func__, + sas_device->enclosure_level, + sas_device->connector_name)); if (sas_device->starget && sas_device->starget->hostdata) { sas_target_priv_data = sas_device->starget->hostdata; @@ -4966,12 +5088,34 @@ _scsih_remove_device(struct MPT3SAS_ADAPTER *ioc, "removing handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, sas_device->handle, (unsigned long long) sas_device->sas_address); + if (sas_device->enclosure_handle != 0) + pr_info(MPT3SAS_FMT + "removing : enclosure logical id(0x%016llx), slot(%d)\n", + ioc->name, + (unsigned long long)sas_device->enclosure_logical_id, + sas_device->slot); + if (sas_device->connector_name[0] != '\0') + pr_info(MPT3SAS_FMT + "removing enclosure level(0x%04x), connector name( %s)\n", + ioc->name, sas_device->enclosure_level, + sas_device->connector_name); dewtprintk(ioc, pr_info(MPT3SAS_FMT "%s: exit: handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, __func__, - sas_device->handle, (unsigned long long) - sas_device->sas_address)); + sas_device->handle, (unsigned long long) + sas_device->sas_address)); + if (sas_device->enclosure_handle != 0) + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: exit: enclosure logical id(0x%016llx), slot(%d)\n", + ioc->name, __func__, + (unsigned long long)sas_device->enclosure_logical_id, + sas_device->slot)); + if (sas_device->connector_name[0] != '\0') + dewtprintk(ioc, pr_info(MPT3SAS_FMT + "%s: exit: enclosure level(0x%04x), connector name(%s)\n", + ioc->name, __func__, sas_device->enclosure_level, + sas_device->connector_name)); kfree(sas_device); } @@ -6364,9 +6508,7 @@ _scsih_prep_device_scan(struct MPT3SAS_ADAPTER *ioc) /** * _scsih_mark_responding_sas_device - mark a sas_devices as responding * @ioc: per adapter object - * @sas_address: sas address - * @slot: enclosure slot id - * @handle: device handle + * @sas_device_pg0: SAS Device page 0 * * After host reset, find out whether devices are still responding. * Used in _scsih_remove_unresponsive_sas_devices. @@ -6374,8 +6516,8 @@ _scsih_prep_device_scan(struct MPT3SAS_ADAPTER *ioc) * Return nothing. */ static void -_scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, - u16 slot, u16 handle) +_scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc, +Mpi2SasDevicePage0_t *sas_device_pg0) { struct MPT3SAS_TARGET *sas_target_priv_data = NULL; struct scsi_target *starget; @@ -6384,8 +6526,8 @@ _scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, spin_lock_irqsave(&ioc->sas_device_lock, flags); list_for_each_entry(sas_device, &ioc->sas_device_list, list) { - if (sas_device->sas_address == sas_address && - sas_device->slot == slot) { + if ((sas_device->sas_address == sas_device_pg0->SASAddress) && + (sas_device->slot == sas_device_pg0->Slot)) { sas_device->responding = 1; starget = sas_device->starget; if (starget && starget->hostdata) { @@ -6394,22 +6536,40 @@ _scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, sas_target_priv_data->deleted = 0; } else sas_target_priv_data = NULL; - if (starget) + if (starget) { starget_printk(KERN_INFO, starget, - "handle(0x%04x), sas_addr(0x%016llx), " - "enclosure logical id(0x%016llx), " - "slot(%d)\n", handle, - (unsigned long long)sas_device->sas_address, + "handle(0x%04x), sas_addr(0x%016llx)\n", + sas_device_pg0->DevHandle, (unsigned long long) - sas_device->enclosure_logical_id, - sas_device->slot); - if (sas_device->handle == handle) + sas_device->sas_address); + + if (sas_device->enclosure_handle != 0) + starget_printk(KERN_INFO, starget, + "enclosure logical id(0x%016llx)," + " slot(%d)\n", + (unsigned long long) + sas_device->enclosure_logical_id, + sas_device->slot); + } + if (sas_device_pg0->Flags & + MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) { + sas_device->enclosure_level = + le16_to_cpu(sas_device_pg0->EnclosureLevel); + memcpy(&sas_device->connector_name[0], + &sas_device_pg0->ConnectorName[0], 4); + } else { + sas_device->enclosure_level = 0; + sas_device->connector_name[0] = '\0'; + } + + if (sas_device->handle == sas_device_pg0->DevHandle) goto out; pr_info("\thandle changed from(0x%04x)!!!\n", sas_device->handle); - sas_device->handle = handle; + sas_device->handle = sas_device_pg0->DevHandle; if (sas_target_priv_data) - sas_target_priv_data->handle = handle; + sas_target_priv_data->handle = + sas_device_pg0->DevHandle; goto out; } } @@ -6448,13 +6608,15 @@ _scsih_search_responding_sas_devices(struct MPT3SAS_ADAPTER *ioc) MPI2_IOCSTATUS_MASK; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; - handle = le16_to_cpu(sas_device_pg0.DevHandle); + handle = sas_device_pg0.DevHandle = + le16_to_cpu(sas_device_pg0.DevHandle); device_info = le32_to_cpu(sas_device_pg0.DeviceInfo); if (!(_scsih_is_end_device(device_info))) continue; - _scsih_mark_responding_sas_device(ioc, - le64_to_cpu(sas_device_pg0.SASAddress), - le16_to_cpu(sas_device_pg0.Slot), handle); + sas_device_pg0.SASAddress = + le64_to_cpu(sas_device_pg0.SASAddress); + sas_device_pg0.Slot = le16_to_cpu(sas_device_pg0.Slot); + _scsih_mark_responding_sas_device(ioc, &sas_device_pg0); } out: -- cgit v0.10.2 From f9d81cfc23151eb6e9e498663c9784c351646d0e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:53 +0530 Subject: mpt3sas: Bump mpt3sas Driver version to v5.100.00.00 Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index da82a97..d635612 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -71,8 +71,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "04.100.00.00" -#define MPT3SAS_MAJOR_VERSION 4 +#define MPT3SAS_DRIVER_VERSION "05.100.00.00" +#define MPT3SAS_MAJOR_VERSION 5 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 -- cgit v0.10.2 From 2e26c3853206ba166c3434e5f2ca4c82078ad84e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:54 +0530 Subject: mpt3sas: Update MPI2 strings to MPI2.5 Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 7e8ede2..0af5744 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3926,7 +3926,7 @@ _base_send_ioc_init(struct MPT3SAS_ADAPTER *ioc, int sleep_flag) mpi_request.WhoInit = MPI2_WHOINIT_HOST_DRIVER; mpi_request.VF_ID = 0; /* TODO */ mpi_request.VP_ID = 0; - mpi_request.MsgVersion = cpu_to_le16(MPI2_VERSION); + mpi_request.MsgVersion = cpu_to_le16(MPI25_VERSION); mpi_request.HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION); if (_base_is_controller_msix_enabled(ioc)) @@ -4795,7 +4795,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->build_sg_scmd = &_base_build_sg_scmd_ieee; ioc->build_sg = &_base_build_sg_ieee; ioc->build_zero_len_sge = &_base_build_zero_len_sge_ieee; - ioc->mpi25 = 1; ioc->sge_size_ieee = sizeof(Mpi2IeeeSgeSimple64_t); /* diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index d635612..c0c774f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -826,7 +826,6 @@ struct MPT3SAS_ADAPTER { MPT_BUILD_SG_SCMD build_sg_scmd; MPT_BUILD_SG build_sg; MPT_BUILD_ZERO_LEN_SGE build_zero_len_sge; - u8 mpi25; u16 sge_size_ieee; /* function ptr for MPI sg elements only */ -- cgit v0.10.2 From 35c319b47884e49d9d0a84779097916ccb173947 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:55 +0530 Subject: mpt3sas: MPI 2.5 Rev J (2.5.5) specification and 2.00.34 header files Following is the change set, 1. Added more defines for the BiosOptions field of MPI2_CONFIG_PAGE_BIOS_1. 2. Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC definition. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h index d730c5c..c2d127c 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2.h @@ -8,7 +8,7 @@ * scatter/gather formats. * Creation Date: June 21, 2006 * - * mpi2.h Version: 02.00.33 + * mpi2.h Version: 02.00.34 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -90,6 +90,7 @@ * 04-17-13 02.00.31 Bumped MPI2_HEADER_VERSION_UNIT. * 08-19-13 02.00.32 Bumped MPI2_HEADER_VERSION_UNIT. * 12-05-13 02.00.33 Bumped MPI2_HEADER_VERSION_UNIT. + * 01-08-14 02.00.34 Bumped MPI2_HEADER_VERSION_UNIT * -------------------------------------------------------------------------- */ @@ -123,7 +124,7 @@ #define MPI2_VERSION_02_05 (0x0205) /*Unit and Dev versioning for this MPI header set */ -#define MPI2_HEADER_VERSION_UNIT (0x21) +#define MPI2_HEADER_VERSION_UNIT (0x22) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h index 62dfbf6..cf2b6bf 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h @@ -6,7 +6,7 @@ * Title: MPI Configuration messages and pages * Creation Date: November 10, 2006 * - * mpi2_cnfg.h Version: 02.00.27 + * mpi2_cnfg.h Version: 02.00.28 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -175,6 +175,8 @@ * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0. * Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0. + * 01-08-14 02.00.28 Added more defines for the BiosOptions field of + * MPI2_CONFIG_PAGE_BIOS_1. * -------------------------------------------------------------------------- */ @@ -1334,9 +1336,17 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 { *PTR_MPI2_CONFIG_PAGE_BIOS_1, Mpi2BiosPage1_t, *pMpi2BiosPage1_t; -#define MPI2_BIOSPAGE1_PAGEVERSION (0x05) +#define MPI2_BIOSPAGE1_PAGEVERSION (0x06) /*values for BIOS Page 1 BiosOptions field */ +#define MPI2_BIOSPAGE1_OPTIONS_X86_DISABLE_BIOS (0x00000400) + +#define MPI2_BIOSPAGE1_OPTIONS_MASK_REGISTRATION_UEFI_BSD (0x00000300) +#define MPI2_BIOSPAGE1_OPTIONS_USE_BIT0_REGISTRATION_UEFI_BSD (0x00000000) +#define MPI2_BIOSPAGE1_OPTIONS_FULL_REGISTRATION_UEFI_BSD (0x00000100) +#define MPI2_BIOSPAGE1_OPTIONS_ADAPTER_REGISTRATION_UEFI_BSD (0x00000200) +#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_REGISTRATION_UEFI_BSD (0x00000300) + #define MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID (0x000000F0) #define MPI2_BIOSPAGE1_OPTIONS_LSI_OEM_ID (0x00000000) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h index 904910d..1629e5b 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h @@ -6,7 +6,7 @@ * Title: MPI diagnostic tool structures and definitions * Creation Date: March 26, 2007 * - * mpi2_tool.h Version: 02.00.11 + * mpi2_tool.h Version: 02.00.12 * * Version History * --------------- @@ -33,6 +33,7 @@ * 07-26-12 02.00.10 Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that * it uses MPI Chain SGE as well as MPI Simple SGE. * 08-19-13 02.00.11 Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info. + * 01-08-14 02.00.12 Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC. * -------------------------------------------------------------------------- */ @@ -100,6 +101,7 @@ typedef struct _MPI2_TOOLBOX_CLEAN_REQUEST { #define MPI2_TOOLBOX_CLEAN_OTHER_PERSIST_PAGES (0x20000000) #define MPI2_TOOLBOX_CLEAN_FW_CURRENT (0x10000000) #define MPI2_TOOLBOX_CLEAN_FW_BACKUP (0x08000000) +#define MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC (0x04000000) #define MPI2_TOOLBOX_CLEAN_MEGARAID (0x02000000) #define MPI2_TOOLBOX_CLEAN_INITIALIZATION (0x01000000) #define MPI2_TOOLBOX_CLEAN_FLASH (0x00000004) -- cgit v0.10.2 From fb84dfc44718ef4099a827d147f738e428828d02 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:56 +0530 Subject: mpt3sas: Add branding string support for OEM's HBA Added the following Dell branding to the mpt3sas driver. "VendorID" "DeviceID" "SubsystemVendor ID" "SubsystemDevice ID" Dell Branding String 0x1000 0x0097 0x1028 0x1F46 DELL 12Gbps HBA Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0af5744..3318e43 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2367,6 +2367,41 @@ _base_display_intel_branding(struct MPT3SAS_ADAPTER *ioc) /** + * _base_display_dell_branding - Display branding string + * @ioc: per adapter object + * + * Return nothing. + */ +static void +_base_display_dell_branding(struct MPT3SAS_ADAPTER *ioc) +{ + if (ioc->pdev->subsystem_vendor != PCI_VENDOR_ID_DELL) + return; + + switch (ioc->pdev->device) { + case MPI25_MFGPAGE_DEVID_SAS3008: + switch (ioc->pdev->subsystem_device) { + case MPT3SAS_DELL_12G_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_DELL_12G_HBA_BRANDING); + break; + default: + pr_info(MPT3SAS_FMT + "Dell 12Gbps HBA: Subsystem ID: 0x%X\n", ioc->name, + ioc->pdev->subsystem_device); + break; + } + break; + default: + pr_info(MPT3SAS_FMT + "Dell 12Gbps HBA: Subsystem ID: 0x%X\n", ioc->name, + ioc->pdev->subsystem_device); + break; + } +} + + +/** * _base_display_ioc_capabilities - Disply IOC's capabilities. * @ioc: per adapter object * @@ -2396,6 +2431,7 @@ _base_display_ioc_capabilities(struct MPT3SAS_ADAPTER *ioc) bios_version & 0x000000FF); _base_display_intel_branding(ioc); + _base_display_dell_branding(ioc); pr_info(MPT3SAS_FMT "Protocol=(", ioc->name); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index c0c774f..a5a34f3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -152,6 +152,17 @@ #define MPT3SAS_INTEL_RS3UC080_SSDID 0x3524 /* + * Dell HBA branding + */ +#define MPT3SAS_DELL_12G_HBA_BRANDING \ + "Dell 12Gbps HBA" + +/* + * Dell HBA SSDIDs + */ +#define MPT3SAS_DELL_12G_HBA_SSDID 0x1F46 + +/* * status bits for ioc->diag_buffer_status */ #define MPT3_DIAG_BUFFER_IS_REGISTERED (0x01) -- cgit v0.10.2 From 38e4141ecb0e59c93d85ec5948f98d1f1e331a88 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:57 +0530 Subject: mpt3sas: Add branding string support for OEM custom HBA Add the following OEM's branding to the mpt3sas driver. "VendorID" "DeviceID" "SubsystemVendor ID" "SubsystemDevice ID" Cisco Branding String 0x1000 0x97 SVID = 0x1137 0x014C Cisco 9300-8E 12G SAS HBA Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 3318e43..3c8561c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2400,6 +2400,39 @@ _base_display_dell_branding(struct MPT3SAS_ADAPTER *ioc) } } +/** + * _base_display_cisco_branding - Display branding string + * @ioc: per adapter object + * + * Return nothing. + */ +static void +_base_display_cisco_branding(struct MPT3SAS_ADAPTER *ioc) +{ + if (ioc->pdev->subsystem_vendor != PCI_VENDOR_ID_CISCO) + return; + + switch (ioc->pdev->device) { + case MPI25_MFGPAGE_DEVID_SAS3008: + switch (ioc->pdev->subsystem_device) { + case MPT3SAS_CISCO_12G_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_CISCO_12G_HBA_BRANDING); + break; + default: + pr_info(MPT3SAS_FMT + "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n", + ioc->name, ioc->pdev->subsystem_device); + break; + } + break; + default: + pr_info(MPT3SAS_FMT + "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n", + ioc->name, ioc->pdev->subsystem_device); + break; + } +} /** * _base_display_ioc_capabilities - Disply IOC's capabilities. @@ -2432,6 +2465,7 @@ _base_display_ioc_capabilities(struct MPT3SAS_ADAPTER *ioc) _base_display_intel_branding(ioc); _base_display_dell_branding(ioc); + _base_display_cisco_branding(ioc); pr_info(MPT3SAS_FMT "Protocol=(", ioc->name); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index a5a34f3..736682c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -163,6 +163,17 @@ #define MPT3SAS_DELL_12G_HBA_SSDID 0x1F46 /* + * Cisco HBA branding + */ +#define MPT3SAS_CISCO_12G_HBA_BRANDING \ + "Cisco 9300-8E 12G SAS HBA" + +/* + * Cisco HBA SSSDIDs + */ + #define MPT3SAS_CISCO_12G_HBA_SSDID 0x14C + +/* * status bits for ioc->diag_buffer_status */ #define MPT3_DIAG_BUFFER_IS_REGISTERED (0x01) -- cgit v0.10.2 From 2b89669ae4addfa68a58e0fc16afdd24739720d8 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:58 +0530 Subject: mpt3sas: Bump mpt3sas driver version to v6.100.00.00 Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 736682c..4196fbb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -71,8 +71,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "05.100.00.00" -#define MPT3SAS_MAJOR_VERSION 5 +#define MPT3SAS_DRIVER_VERSION "06.100.00.00" +#define MPT3SAS_MAJOR_VERSION 6 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 -- cgit v0.10.2 From a6f84009b9d7ca97c400ca929f9d58f43b0ece9d Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:24:59 +0530 Subject: mpt3sas: MPI 2.5 Rev K (2.5.6) specifications Below are the new changes to MPI 2.5 Rev K(2.5.6) specification and 2.00.35 header files 1) Added a minimum size requirement for target mode command buffers. 2) Added MinMSIxIndex and MaxMSIxIndex fields to CommandBufferPostBase Request. 3) For BIOS Page 1, added SSUTimeout field, and added Product Name String Format bits to the BiosOptions field Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h index c2d127c..ec27ad2 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2.h @@ -8,7 +8,7 @@ * scatter/gather formats. * Creation Date: June 21, 2006 * - * mpi2.h Version: 02.00.34 + * mpi2.h Version: 02.00.35 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -91,6 +91,7 @@ * 08-19-13 02.00.32 Bumped MPI2_HEADER_VERSION_UNIT. * 12-05-13 02.00.33 Bumped MPI2_HEADER_VERSION_UNIT. * 01-08-14 02.00.34 Bumped MPI2_HEADER_VERSION_UNIT + * 06-13-14 02.00.35 Bumped MPI2_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- */ @@ -124,7 +125,7 @@ #define MPI2_VERSION_02_05 (0x0205) /*Unit and Dev versioning for this MPI header set */ -#define MPI2_HEADER_VERSION_UNIT (0x22) +#define MPI2_HEADER_VERSION_UNIT (0x23) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h index cf2b6bf..581fdb3 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h @@ -6,7 +6,7 @@ * Title: MPI Configuration messages and pages * Creation Date: November 10, 2006 * - * mpi2_cnfg.h Version: 02.00.28 + * mpi2_cnfg.h Version: 02.00.29 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -177,6 +177,8 @@ * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0. * 01-08-14 02.00.28 Added more defines for the BiosOptions field of * MPI2_CONFIG_PAGE_BIOS_1. + * 06-13-14 02.00.29 Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and + * more defines for the BiosOptions field.. * -------------------------------------------------------------------------- */ @@ -1324,7 +1326,9 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 { MPI2_CONFIG_PAGE_HEADER Header; /*0x00 */ U32 BiosOptions; /*0x04 */ U32 IOCSettings; /*0x08 */ - U32 Reserved1; /*0x0C */ + U8 SSUTimeout; /*0x0C */ + U8 Reserved1; /*0x0D */ + U16 Reserved2; /*0x0E */ U32 DeviceSettings; /*0x10 */ U16 NumberOfDevices; /*0x14 */ U16 UEFIVersion; /*0x16 */ @@ -1336,9 +1340,16 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 { *PTR_MPI2_CONFIG_PAGE_BIOS_1, Mpi2BiosPage1_t, *pMpi2BiosPage1_t; -#define MPI2_BIOSPAGE1_PAGEVERSION (0x06) +#define MPI2_BIOSPAGE1_PAGEVERSION (0x07) /*values for BIOS Page 1 BiosOptions field */ +#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800) +#define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL (0x00000000) +#define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE (0x00000800) +#define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID (0x00001000) +#define MPI2_BIOSPAGE1_OPTIONS_PNS_PSENS (0x00001800) +#define MPI2_BIOSPAGE1_OPTIONS_PNS_ESPHY (0x00002000) + #define MPI2_BIOSPAGE1_OPTIONS_X86_DISABLE_BIOS (0x00000400) #define MPI2_BIOSPAGE1_OPTIONS_MASK_REGISTRATION_UEFI_BSD (0x00000300) -- cgit v0.10.2 From 3898f08e8ccfc8b7b4c297960ecdde970869e950 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:00 +0530 Subject: mpt3sas: Complete the SCSI command with DID_RESET status for log_info value 0x0x32010081 For any SCSI command, if the driver receives IOC status = SCSI_IOC_TERMINATED and log info = 0x32010081 then that command will be completed with DID_RESET host status. The definition of this log info value is "Virtual IO has failed and has to be retried". Firmware will provide this log info value with IOC Status "SCSI_IOC_TERMINATED", whenever a drive (with is a part of a volume) is pulled and pushed back within some minimal delay. With this log info value, firmware informs the driver to retry the failed IO command infinite times, so to provide some time for the firmware to discover the reinserted drive successfully instated of just retrying failed command for five times(doesn't giving enough time for firmware to complete the drive discovery) and failing the IO permanently even though drive came back successfully. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 4196fbb..3b03639 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -202,6 +202,8 @@ #define MFG10_GF0_SSD_DATA_SCRUB_DISABLE (0x00000008) #define MFG10_GF0_SINGLE_DRIVE_R0 (0x00000010) +#define VIRTUAL_IO_FAILED_RETRY (0x32010081) + /* OEM Specific Flags will come from OEM specific header files */ struct Mpi2ManufacturingPage10_t { MPI2_CONFIG_PAGE_HEADER Header; /* 00h */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index ec3b3d3..9dbc71f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4243,6 +4243,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) scmd->device->expecting_cc_ua = 1; } break; + } else if (log_info == VIRTUAL_IO_FAILED_RETRY) { + scmd->result = DID_RESET << 16; + break; } scmd->result = DID_SOFT_ERROR << 16; break; -- cgit v0.10.2 From 62f5c74c97037c7c00fdefb69dcfe39810c34a05 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:01 +0530 Subject: mpt3sas: Return host busy error status to SML when DMA mapping of scatter gather list fails for a SCSI command scsi_dma_map API will return a negative value (i.e. -ENOMEM) if DMA mapping of sg lists fails and zero if the sg list in the SCSI cmd is NULL. But drivers doesn't handled sg list DMA mapping failure case properly. So, Updated the code to return host busy error status to SCSI MID Layer(SML), when DMA mapping of scatter gather list fails for a SCSI command. So that SML will retry this SCSI cmd after some time. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 3c8561c..d4d4e6c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1360,7 +1360,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sg_scmd = scsi_sglist(scmd); sges_left = scsi_dma_map(scmd); - if (!sges_left) { + if (sges_left < 0) { sdev_printk(KERN_ERR, scmd->device, "pci_map_sg failed: request for %d bytes!\n", scsi_bufflen(scmd)); @@ -1429,7 +1429,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, fill_in_last_segment: /* fill the last segment */ - while (sges_left) { + while (sges_left > 0) { if (sges_left == 1) _base_add_sg_single_ieee(sg_local, simple_sgl_flags_last, 0, sg_dma_len(sg_scmd), -- cgit v0.10.2 From d8eb4a47c70b4bab34b938d2f682044687f53c64 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:02 +0530 Subject: mpt3sas: Added support for customer specific branding "VendorID" "DeviceID" "SubsystemVendor ID" "SubsystemDevice ID" Cisco Branding String 0x1000 0x97 0x1137 0x154 Cisco 9300-8i 12Gbps SAS HBA 0x1000 0x97 0x1137 0x155 Cisco 12G Modular SAS Pass through Controller 0x1000 0x97 0x1137 0x156 UCS C3X60 12G SAS Pass through Controller Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index d4d4e6c..d4f1dcd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2415,9 +2415,17 @@ _base_display_cisco_branding(struct MPT3SAS_ADAPTER *ioc) switch (ioc->pdev->device) { case MPI25_MFGPAGE_DEVID_SAS3008: switch (ioc->pdev->subsystem_device) { - case MPT3SAS_CISCO_12G_HBA_SSDID: + case MPT3SAS_CISCO_12G_8E_HBA_SSDID: pr_info(MPT3SAS_FMT "%s\n", ioc->name, - MPT3SAS_CISCO_12G_HBA_BRANDING); + MPT3SAS_CISCO_12G_8E_HBA_BRANDING); + break; + case MPT3SAS_CISCO_12G_8I_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_CISCO_12G_8I_HBA_BRANDING); + break; + case MPT3SAS_CISCO_12G_AVILA_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING); break; default: pr_info(MPT3SAS_FMT @@ -2426,6 +2434,23 @@ _base_display_cisco_branding(struct MPT3SAS_ADAPTER *ioc) break; } break; + case MPI25_MFGPAGE_DEVID_SAS3108_1: + switch (ioc->pdev->subsystem_device) { + case MPT3SAS_CISCO_12G_AVILA_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING); + break; + case MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_SSDID: + pr_info(MPT3SAS_FMT "%s\n", ioc->name, + MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_BRANDING); + break; + default: + pr_info(MPT3SAS_FMT + "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n", + ioc->name, ioc->pdev->subsystem_device); + break; + } + break; default: pr_info(MPT3SAS_FMT "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n", diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 3b03639..87e9000 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -165,13 +165,21 @@ /* * Cisco HBA branding */ -#define MPT3SAS_CISCO_12G_HBA_BRANDING \ - "Cisco 9300-8E 12G SAS HBA" - +#define MPT3SAS_CISCO_12G_8E_HBA_BRANDING \ + "Cisco 9300-8E 12G SAS HBA" +#define MPT3SAS_CISCO_12G_8I_HBA_BRANDING \ + "Cisco 9300-8i 12G SAS HBA" +#define MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING \ + "Cisco 12G Modular SAS Pass through Controller" +#define MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_BRANDING \ + "UCS C3X60 12G SAS Pass through Controller" /* * Cisco HBA SSSDIDs */ - #define MPT3SAS_CISCO_12G_HBA_SSDID 0x14C +#define MPT3SAS_CISCO_12G_8E_HBA_SSDID 0x14C +#define MPT3SAS_CISCO_12G_8I_HBA_SSDID 0x154 +#define MPT3SAS_CISCO_12G_AVILA_HBA_SSDID 0x155 +#define MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_SSDID 0x156 /* * status bits for ioc->diag_buffer_status -- cgit v0.10.2 From bdff785e4f593218816fa3677e043aae1481aa98 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:03 +0530 Subject: mpt3sas: Use alloc_ordered_workqueue() API instead of create_singlethread_workqueue() API Created a thread using alloc_ordered_workqueue() API in order to process the works from firmware Work-queue sequentially instead of create_singlethread_workqueue() API. Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Joe Lawrence Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 9dbc71f..6e0a7fd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -8026,8 +8026,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* event thread */ snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name), "fw_event%d", ioc->id); - ioc->firmware_event_thread = create_singlethread_workqueue( - ioc->firmware_event_name); + ioc->firmware_event_thread = alloc_ordered_workqueue( + ioc->firmware_event_name, WQ_MEM_RECLAIM); if (!ioc->firmware_event_thread) { pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); -- cgit v0.10.2 From 36814028ad720165a6febcf9ddd7de20833fd240 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:04 +0530 Subject: mpt3sas: Call dma_mapping_error() API after mapping an address with dma_map_single() API Added dma_mapping_error() API after mapping an address with dma_map_single() API. Otherwise when CONFIG_DMA_API_DEBUG is enabled in the kernel, then it complains about mpt3sas driver not calling dma_mapping_error after mapping an address with dma_map_single Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 7a7aa68..70fd019 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1964,7 +1964,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } else { dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio), blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL); - if (!dma_addr_out) { + if (pci_dma_mapping_error(ioc->pdev, dma_addr_out)) { pr_info(MPT3SAS_FMT "%s(): DMA Addr out = NULL\n", ioc->name, __func__); rc = -ENOMEM; @@ -1986,7 +1986,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } else { dma_addr_in = pci_map_single(ioc->pdev, bio_data(rsp->bio), blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL); - if (!dma_addr_in) { + if (pci_dma_mapping_error(ioc->pdev, dma_addr_in)) { pr_info(MPT3SAS_FMT "%s(): DMA Addr in = NULL\n", ioc->name, __func__); rc = -ENOMEM; -- cgit v0.10.2 From df838f92f3f5240dca54e1629e8547818e8ea646 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:05 +0530 Subject: mpt3sas: When device is blocked followed by unblock fails, unfreeze the I/Os Issue: When the disks are getting discovered and assigned device handles by the kernel, a device block followed by an unblock (due to broadcast primitives) issued by the driver is interspersed by the kernel changing the state of the device. Therefore the unblock by the driver results in a no operation within the kernel API. To fix this one, the below patch checks the return of the unblock API and performs a block followed by an unblock to unfreeze the block layer's I/O queue. Sufficient checks and prints are also added in the driver to identify this condition caused by the kernel. Signed-off-by: Sreekanth Reddy Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6e0a7fd..8ccef38 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2605,6 +2605,75 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc) } /** + * _scsih_internal_device_block - block the sdev device + * @sdev: per device object + * @sas_device_priv_data : per device driver private data + * + * make sure device is blocked without error, if not + * print an error + */ +static void +_scsih_internal_device_block(struct scsi_device *sdev, + struct MPT3SAS_DEVICE *sas_device_priv_data) +{ + int r = 0; + + sdev_printk(KERN_INFO, sdev, "device_block, handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle); + sas_device_priv_data->block = 1; + + r = scsi_internal_device_block(sdev); + if (r == -EINVAL) + sdev_printk(KERN_WARNING, sdev, + "device_block failed with return(%d) for handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle, r); +} + +/** + * _scsih_internal_device_unblock - unblock the sdev device + * @sdev: per device object + * @sas_device_priv_data : per device driver private data + * make sure device is unblocked without error, if not retry + * by blocking and then unblocking + */ + +static void +_scsih_internal_device_unblock(struct scsi_device *sdev, + struct MPT3SAS_DEVICE *sas_device_priv_data) +{ + int r = 0; + + sdev_printk(KERN_WARNING, sdev, "device_unblock and setting to running, " + "handle(0x%04x)\n", sas_device_priv_data->sas_target->handle); + sas_device_priv_data->block = 0; + r = scsi_internal_device_unblock(sdev, SDEV_RUNNING); + if (r == -EINVAL) { + /* The device has been set to SDEV_RUNNING by SD layer during + * device addition but the request queue is still stopped by + * our earlier block call. We need to perform a block again + * to get the device to SDEV_BLOCK and then to SDEV_RUNNING */ + + sdev_printk(KERN_WARNING, sdev, + "device_unblock failed with return(%d) for handle(0x%04x) " + "performing a block followed by an unblock\n", + sas_device_priv_data->sas_target->handle, r); + sas_device_priv_data->block = 1; + r = scsi_internal_device_block(sdev); + if (r) + sdev_printk(KERN_WARNING, sdev, "retried device_block " + "failed with return(%d) for handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle, r); + + sas_device_priv_data->block = 0; + r = scsi_internal_device_unblock(sdev, SDEV_RUNNING); + if (r) + sdev_printk(KERN_WARNING, sdev, "retried device_unblock" + " failed with return(%d) for handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle, r); + } +} + +/** * _scsih_ublock_io_all_device - unblock every device * @ioc: per adapter object * @@ -2623,11 +2692,10 @@ _scsih_ublock_io_all_device(struct MPT3SAS_ADAPTER *ioc) if (!sas_device_priv_data->block) continue; - sas_device_priv_data->block = 0; dewtprintk(ioc, sdev_printk(KERN_INFO, sdev, "device_running, handle(0x%04x)\n", sas_device_priv_data->sas_target->handle)); - scsi_internal_device_unblock(sdev, SDEV_RUNNING); + _scsih_internal_device_unblock(sdev, sas_device_priv_data); } } @@ -2652,10 +2720,9 @@ _scsih_ublock_io_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address) if (sas_device_priv_data->sas_target->sas_address != sas_address) continue; - if (sas_device_priv_data->block) { - sas_device_priv_data->block = 0; - scsi_internal_device_unblock(sdev, SDEV_RUNNING); - } + if (sas_device_priv_data->block) + _scsih_internal_device_unblock(sdev, + sas_device_priv_data); } } @@ -2678,10 +2745,7 @@ _scsih_block_io_all_device(struct MPT3SAS_ADAPTER *ioc) continue; if (sas_device_priv_data->block) continue; - sas_device_priv_data->block = 1; - scsi_internal_device_block(sdev); - sdev_printk(KERN_INFO, sdev, "device_blocked, handle(0x%04x)\n", - sas_device_priv_data->sas_target->handle); + _scsih_internal_device_block(sdev, sas_device_priv_data); } } @@ -2713,10 +2777,7 @@ _scsih_block_io_device(struct MPT3SAS_ADAPTER *ioc, u16 handle) continue; if (sas_device->pend_sas_rphy_add) continue; - sas_device_priv_data->block = 1; - scsi_internal_device_block(sdev); - sdev_printk(KERN_INFO, sdev, - "device_blocked, handle(0x%04x)\n", handle); + _scsih_internal_device_block(sdev, sas_device_priv_data); } } -- cgit v0.10.2 From c75683ca13d12a700531864bcd3118e94bc9eaa0 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 30 Jun 2015 12:25:06 +0530 Subject: mpt3sas : Bump mpt3sas driver version to 9.100.00.00 Signed-off-by: Sreekanth Reddy Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 87e9000..f0e462b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -71,8 +71,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "06.100.00.00" -#define MPT3SAS_MAJOR_VERSION 6 +#define MPT3SAS_DRIVER_VERSION "09.100.00.00" +#define MPT3SAS_MAJOR_VERSION 9 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 -- cgit v0.10.2 From 3cb4fc1fcae9647c594bc2b8fda1b818f4fb757f Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Aug 2015 11:47:05 -0300 Subject: ipr: Inhibit underlength data check for AFDASD in raw mode. Disable underlength error verification based on count of bytes actually transferred for AF DASD devices when SIS pipe mode is enabled. This avoids unexpected underlength errors when issuing some commands in raw mode. Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 3411919..ba16dd7 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6383,9 +6383,13 @@ static int ipr_queuecommand(struct Scsi_Host *shost, (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; } - if (res->raw_mode && ipr_is_af_dasd_device(res)) + if (res->raw_mode && ipr_is_af_dasd_device(res)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_PIPE; + if (scsi_cmd->underflow == 0) + ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_ULEN_CHK; + } + if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else -- cgit v0.10.2 From e35d7f27fbd51a09a41a5439e39f22a3d102c00b Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Aug 2015 11:47:06 -0300 Subject: ipr: Enable SIS pipe commands for SIS-32 devices. Remove unnecessary check that disabled SIS pipe commands for SIS-32 devices. This change was sufficient to enable raw mode and send SIS pipe commands for a 57B3 device. Fixes: f8ee25d7d239 ("ipr: AF DASD raw mode implementation in ipr driver") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index ba16dd7..b62836d 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4555,7 +4555,7 @@ static ssize_t ipr_store_raw_mode(struct device *dev, spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); res = (struct ipr_resource_entry *)sdev->hostdata; if (res) { - if (ioa_cfg->sis64 && ipr_is_af_dasd_device(res)) { + if (ipr_is_af_dasd_device(res)) { res->raw_mode = simple_strtoul(buf, NULL, 10); len = strlen(buf); if (res->sdev) -- cgit v0.10.2 From 36dd5acd196574d41de3e81d8264df475bbb7123 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 6 Jul 2015 13:07:58 +0200 Subject: aic94xx: Skip reading user settings if flash is not found If no user settings are found it's pointless trying to read them from flash. So skip that step. This also fixes a compilation warning about uninitialized variables in aic94xx. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: James Bottomley diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c index edb43fd..c831e30 100644 --- a/drivers/scsi/aic94xx/aic94xx_sds.c +++ b/drivers/scsi/aic94xx/aic94xx_sds.c @@ -983,7 +983,7 @@ static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha, { int err, i; u32 offs, size; - struct asd_ll_el *el; + struct asd_ll_el *el = NULL; struct asd_ctrla_phy_settings *ps; struct asd_ctrla_phy_settings dflt_ps; @@ -1004,6 +1004,7 @@ static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha, size = sizeof(struct asd_ctrla_phy_settings); ps = &dflt_ps; + goto out_process; } if (size == 0) @@ -1028,7 +1029,7 @@ static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha, ASD_DPRINTK("couldn't find ctrla phy settings struct\n"); goto out2; } - +out_process: err = asd_process_ctrla_phy_settings(asd_ha, ps); if (err) { ASD_DPRINTK("couldn't process ctrla phy settings\n"); -- cgit v0.10.2 From 1bab0de0274fbe8c8ac92179e6705584c55ed169 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:54 +0200 Subject: dm-mpath, scsi_dh: don't let dm detach device handlers While allowing dm-mpath to attach device handlers is a functionality we need for backwards compatibility reason there is no reason to reference count them and detach them if dm-mpath stops using the device for some reason. If the device handler works for the given device it can just stay attached, and we can take the retain_hw_handler codepath. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Acked-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index eff7bdd..a9f58fd 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -159,12 +159,9 @@ static struct priority_group *alloc_priority_group(void) static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) { struct pgpath *pgpath, *tmp; - struct multipath *m = ti->private; list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { list_del(&pgpath->list); - if (m->hw_handler_name) - scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); dm_put_device(ti, pgpath->path.dev); free_pgpath(pgpath); } @@ -580,6 +577,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps q = bdev_get_queue(p->path.dev->bdev); if (m->retain_attached_hw_handler) { +retain: attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name) { /* @@ -599,20 +597,14 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps } if (m->hw_handler_name) { - /* - * Increments scsi_dh reference, even when using an - * already-attached handler. - */ r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { - /* - * Already attached to different hw_handler: - * try to reattach with correct one. - */ - scsi_dh_detach(q); - r = scsi_dh_attach(q, m->hw_handler_name); - } + char b[BDEVNAME_SIZE]; + printk(KERN_INFO "dm-mpath: retaining handler on device %s\n", + bdevname(p->path.dev->bdev, b)); + goto retain; + } if (r < 0) { ti->error = "error attaching hardware handler"; dm_put_device(ti, p->path.dev); @@ -624,7 +616,6 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps if (r < 0) { ti->error = "unable to set hardware " "handler parameters"; - scsi_dh_detach(q); dm_put_device(ti, p->path.dev); goto bad; } diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c index 1efebc9..869b5bd 100644 --- a/drivers/scsi/device_handler/scsi_dh.c +++ b/drivers/scsi/device_handler/scsi_dh.c @@ -100,14 +100,6 @@ static int scsi_dh_handler_attach(struct scsi_device *sdev, { struct scsi_dh_data *d; - if (sdev->scsi_dh_data) { - if (sdev->scsi_dh_data->scsi_dh != scsi_dh) - return -EBUSY; - - kref_get(&sdev->scsi_dh_data->kref); - return 0; - } - if (!try_module_get(scsi_dh->module)) return -EINVAL; @@ -120,7 +112,6 @@ static int scsi_dh_handler_attach(struct scsi_device *sdev, } d->scsi_dh = scsi_dh; - kref_init(&d->kref); d->sdev = sdev; spin_lock_irq(sdev->request_queue->queue_lock); @@ -129,12 +120,14 @@ static int scsi_dh_handler_attach(struct scsi_device *sdev, return 0; } -static void __detach_handler (struct kref *kref) +/* + * scsi_dh_handler_detach - Detach a device handler from a device + * @sdev - SCSI device the device handler should be detached from + */ +static void scsi_dh_handler_detach(struct scsi_device *sdev) { - struct scsi_dh_data *scsi_dh_data = - container_of(kref, struct scsi_dh_data, kref); + struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data; struct scsi_device_handler *scsi_dh = scsi_dh_data->scsi_dh; - struct scsi_device *sdev = scsi_dh_data->sdev; scsi_dh->detach(sdev); @@ -147,30 +140,6 @@ static void __detach_handler (struct kref *kref) } /* - * scsi_dh_handler_detach - Detach a device handler from a device - * @sdev - SCSI device the device handler should be detached from - * @scsi_dh - Device handler to be detached - * - * Detach from a device handler. If a device handler is specified, - * only detach if the currently attached handler matches @scsi_dh. - */ -static void scsi_dh_handler_detach(struct scsi_device *sdev, - struct scsi_device_handler *scsi_dh) -{ - if (!sdev->scsi_dh_data) - return; - - if (scsi_dh && scsi_dh != sdev->scsi_dh_data->scsi_dh) - return; - - if (!scsi_dh) - scsi_dh = sdev->scsi_dh_data->scsi_dh; - - if (scsi_dh) - kref_put(&sdev->scsi_dh_data->kref, __detach_handler); -} - -/* * Functions for sysfs attribute 'dh_state' */ static ssize_t @@ -198,7 +167,7 @@ store_dh_state(struct device *dev, struct device_attribute *attr, /* * Detach from a device handler */ - scsi_dh_handler_detach(sdev, scsi_dh); + scsi_dh_handler_detach(sdev); err = 0; } else if (!strncmp(buf, "activate", 8)) { /* @@ -290,7 +259,8 @@ static int scsi_dh_notifier(struct notifier_block *nb, err = scsi_dh_handler_attach(sdev, devinfo); } else if (action == BUS_NOTIFY_DEL_DEVICE) { device_remove_file(dev, &scsi_dh_state_attr); - scsi_dh_handler_detach(sdev, NULL); + if (sdev->scsi_dh_data) + scsi_dh_handler_detach(sdev); } return err; } @@ -335,7 +305,8 @@ static int scsi_dh_notifier_remove(struct device *dev, void *data) sdev = to_scsi_device(dev); - scsi_dh_handler_detach(sdev, scsi_dh); + if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh == scsi_dh) + scsi_dh_handler_detach(sdev); put_device(dev); @@ -517,45 +488,22 @@ int scsi_dh_attach(struct request_queue *q, const char *name) err = -ENODEV; spin_unlock_irqrestore(q->queue_lock, flags); - if (!err) { - err = scsi_dh_handler_attach(sdev, scsi_dh); - put_device(&sdev->sdev_gendev); - } - return err; -} -EXPORT_SYMBOL_GPL(scsi_dh_attach); - -/* - * scsi_dh_detach - Detach device handler - * @q - Request queue that is associated with the scsi_device - * the handler should be detached from - * - * This function will detach the device handler only - * if the sdev is not part of the internal list, ie - * if it has been attached manually. - */ -void scsi_dh_detach(struct request_queue *q) -{ - unsigned long flags; - struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh = NULL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - sdev = NULL; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (!sdev) - return; + if (err) + return err; if (sdev->scsi_dh_data) { - scsi_dh = sdev->scsi_dh_data->scsi_dh; - scsi_dh_handler_detach(sdev, scsi_dh); + if (sdev->scsi_dh_data->scsi_dh != scsi_dh) + err = -EBUSY; + goto out_put_device; } + + err = scsi_dh_handler_attach(sdev, scsi_dh); + +out_put_device: put_device(&sdev->sdev_gendev); + return err; } -EXPORT_SYMBOL_GPL(scsi_dh_detach); +EXPORT_SYMBOL_GPL(scsi_dh_attach); /* * scsi_dh_attached_handler_name - Get attached device handler's name diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 50c2a36..798d679 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -221,7 +221,6 @@ struct scsi_device_handler { struct scsi_dh_data { struct scsi_device_handler *scsi_dh; struct scsi_device *sdev; - struct kref kref; }; #define to_scsi_device(d) \ diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 620c723..99c9196 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -59,7 +59,6 @@ enum { extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); extern int scsi_dh_handler_exist(const char *); extern int scsi_dh_attach(struct request_queue *, const char *); -extern void scsi_dh_detach(struct request_queue *); extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t); extern int scsi_dh_set_params(struct request_queue *, const char *); #else @@ -77,10 +76,6 @@ static inline int scsi_dh_attach(struct request_queue *req, const char *name) { return SCSI_DH_NOSYS; } -static inline void scsi_dh_detach(struct request_queue *q) -{ - return; -} static inline const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) { -- cgit v0.10.2 From 566079c849cfe538e908c44ac11a9c4638db8f91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:55 +0200 Subject: dm-mpath, scsi_dh: request scsi_dh modules in scsi_dh, not dm-mpath This way we can reused the same code any attachment method, not just those requested from dm-mpath. [jejb: fixup checkpatch error] Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Acked-by: Mike Snitzer Signed-off-by: James Bottomley diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a9f58fd..5a67671 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -725,12 +725,6 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) return 0; m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); - if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name), - "scsi_dh_%s", m->hw_handler_name)) { - ti->error = "unknown hardware handler type"; - ret = -EINVAL; - goto fail; - } if (hw_argc > 1) { char *p; diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c index 869b5bd..3dfef5b 100644 --- a/drivers/scsi/device_handler/scsi_dh.c +++ b/drivers/scsi/device_handler/scsi_dh.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(list_lock); static LIST_HEAD(scsi_dh_list); -static struct scsi_device_handler *get_device_handler(const char *name) +static struct scsi_device_handler *__scsi_dh_lookup(const char *name) { struct scsi_device_handler *tmp, *found = NULL; @@ -44,6 +44,19 @@ static struct scsi_device_handler *get_device_handler(const char *name) return found; } +static struct scsi_device_handler *scsi_dh_lookup(const char *name) +{ + struct scsi_device_handler *dh; + + dh = __scsi_dh_lookup(name); + if (!dh) { + request_module(name); + dh = __scsi_dh_lookup(name); + } + + return dh; +} + /* * device_handler_match_function - Match a device handler to a device * @sdev - SCSI device to be tested @@ -158,7 +171,8 @@ store_dh_state(struct device *dev, struct device_attribute *attr, /* * Attach to a device handler */ - if (!(scsi_dh = get_device_handler(buf))) + scsi_dh = scsi_dh_lookup(buf); + if (!scsi_dh) return err; err = scsi_dh_handler_attach(sdev, scsi_dh); } else { @@ -322,8 +336,7 @@ static int scsi_dh_notifier_remove(struct device *dev, void *data) */ int scsi_register_device_handler(struct scsi_device_handler *scsi_dh) { - - if (get_device_handler(scsi_dh->name)) + if (__scsi_dh_lookup(scsi_dh->name)) return -EBUSY; if (!scsi_dh->attach || !scsi_dh->detach) @@ -350,7 +363,7 @@ EXPORT_SYMBOL_GPL(scsi_register_device_handler); int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) { - if (!get_device_handler(scsi_dh->name)) + if (!__scsi_dh_lookup(scsi_dh->name)) return -ENODEV; bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, @@ -455,17 +468,6 @@ int scsi_dh_set_params(struct request_queue *q, const char *params) EXPORT_SYMBOL_GPL(scsi_dh_set_params); /* - * scsi_dh_handler_exist - Return TRUE(1) if a device handler exists for - * the given name. FALSE(0) otherwise. - * @name - name of the device handler. - */ -int scsi_dh_handler_exist(const char *name) -{ - return (get_device_handler(name) != NULL); -} -EXPORT_SYMBOL_GPL(scsi_dh_handler_exist); - -/* * scsi_dh_attach - Attach device handler * @q - Request queue that is associated with the scsi_device * the handler should be attached to @@ -478,7 +480,7 @@ int scsi_dh_attach(struct request_queue *q, const char *name) struct scsi_device_handler *scsi_dh; int err = 0; - scsi_dh = get_device_handler(name); + scsi_dh = scsi_dh_lookup(name); if (!scsi_dh) return -EINVAL; diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 99c9196..966b921 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -57,7 +57,6 @@ enum { }; #if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE) extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); -extern int scsi_dh_handler_exist(const char *); extern int scsi_dh_attach(struct request_queue *, const char *); extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t); extern int scsi_dh_set_params(struct request_queue *, const char *); @@ -68,10 +67,6 @@ static inline int scsi_dh_activate(struct request_queue *req, fn(data, 0); return 0; } -static inline int scsi_dh_handler_exist(const char *name) -{ - return 0; -} static inline int scsi_dh_attach(struct request_queue *req, const char *name) { return SCSI_DH_NOSYS; -- cgit v0.10.2 From daaa858b7a6bb497f11c2aae555053b9c047824b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:56 +0200 Subject: scsi_dh: move to drivers/scsi Prepare for building scsi_dh.c into the core SCSI module by moving it to drivers/scsi. Signed-off-by: Christoph Hellwig Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c deleted file mode 100644 index 3dfef5b..0000000 --- a/drivers/scsi/device_handler/scsi_dh.c +++ /dev/null @@ -1,571 +0,0 @@ -/* - * SCSI device handler infrastruture. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2007 - * Authors: - * Chandra Seetharaman - * Mike Anderson - */ - -#include -#include -#include -#include "../scsi_priv.h" - -static DEFINE_SPINLOCK(list_lock); -static LIST_HEAD(scsi_dh_list); - -static struct scsi_device_handler *__scsi_dh_lookup(const char *name) -{ - struct scsi_device_handler *tmp, *found = NULL; - - spin_lock(&list_lock); - list_for_each_entry(tmp, &scsi_dh_list, list) { - if (!strncmp(tmp->name, name, strlen(tmp->name))) { - found = tmp; - break; - } - } - spin_unlock(&list_lock); - return found; -} - -static struct scsi_device_handler *scsi_dh_lookup(const char *name) -{ - struct scsi_device_handler *dh; - - dh = __scsi_dh_lookup(name); - if (!dh) { - request_module(name); - dh = __scsi_dh_lookup(name); - } - - return dh; -} - -/* - * device_handler_match_function - Match a device handler to a device - * @sdev - SCSI device to be tested - * - * Tests @sdev against the match function of all registered device_handler. - * Returns the found device handler or NULL if not found. - */ -static struct scsi_device_handler * -device_handler_match_function(struct scsi_device *sdev) -{ - struct scsi_device_handler *tmp_dh, *found_dh = NULL; - - spin_lock(&list_lock); - list_for_each_entry(tmp_dh, &scsi_dh_list, list) { - if (tmp_dh->match && tmp_dh->match(sdev)) { - found_dh = tmp_dh; - break; - } - } - spin_unlock(&list_lock); - return found_dh; -} - -/* - * device_handler_match - Attach a device handler to a device - * @scsi_dh - The device handler to match against or NULL - * @sdev - SCSI device to be tested against @scsi_dh - * - * Tests @sdev against the device handler @scsi_dh or against - * all registered device_handler if @scsi_dh == NULL. - * Returns the found device handler or NULL if not found. - */ -static struct scsi_device_handler * -device_handler_match(struct scsi_device_handler *scsi_dh, - struct scsi_device *sdev) -{ - struct scsi_device_handler *found_dh; - - found_dh = device_handler_match_function(sdev); - - if (scsi_dh && found_dh != scsi_dh) - found_dh = NULL; - - return found_dh; -} - -/* - * scsi_dh_handler_attach - Attach a device handler to a device - * @sdev - SCSI device the device handler should attach to - * @scsi_dh - The device handler to attach - */ -static int scsi_dh_handler_attach(struct scsi_device *sdev, - struct scsi_device_handler *scsi_dh) -{ - struct scsi_dh_data *d; - - if (!try_module_get(scsi_dh->module)) - return -EINVAL; - - d = scsi_dh->attach(sdev); - if (IS_ERR(d)) { - sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%ld)\n", - scsi_dh->name, PTR_ERR(d)); - module_put(scsi_dh->module); - return PTR_ERR(d); - } - - d->scsi_dh = scsi_dh; - d->sdev = sdev; - - spin_lock_irq(sdev->request_queue->queue_lock); - sdev->scsi_dh_data = d; - spin_unlock_irq(sdev->request_queue->queue_lock); - return 0; -} - -/* - * scsi_dh_handler_detach - Detach a device handler from a device - * @sdev - SCSI device the device handler should be detached from - */ -static void scsi_dh_handler_detach(struct scsi_device *sdev) -{ - struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data; - struct scsi_device_handler *scsi_dh = scsi_dh_data->scsi_dh; - - scsi_dh->detach(sdev); - - spin_lock_irq(sdev->request_queue->queue_lock); - sdev->scsi_dh_data = NULL; - spin_unlock_irq(sdev->request_queue->queue_lock); - - sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", scsi_dh->name); - module_put(scsi_dh->module); -} - -/* - * Functions for sysfs attribute 'dh_state' - */ -static ssize_t -store_dh_state(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct scsi_device *sdev = to_scsi_device(dev); - struct scsi_device_handler *scsi_dh; - int err = -EINVAL; - - if (sdev->sdev_state == SDEV_CANCEL || - sdev->sdev_state == SDEV_DEL) - return -ENODEV; - - if (!sdev->scsi_dh_data) { - /* - * Attach to a device handler - */ - scsi_dh = scsi_dh_lookup(buf); - if (!scsi_dh) - return err; - err = scsi_dh_handler_attach(sdev, scsi_dh); - } else { - scsi_dh = sdev->scsi_dh_data->scsi_dh; - if (!strncmp(buf, "detach", 6)) { - /* - * Detach from a device handler - */ - scsi_dh_handler_detach(sdev); - err = 0; - } else if (!strncmp(buf, "activate", 8)) { - /* - * Activate a device handler - */ - if (scsi_dh->activate) - err = scsi_dh->activate(sdev, NULL, NULL); - else - err = 0; - } - } - - return err<0?err:count; -} - -static ssize_t -show_dh_state(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct scsi_device *sdev = to_scsi_device(dev); - - if (!sdev->scsi_dh_data) - return snprintf(buf, 20, "detached\n"); - - return snprintf(buf, 20, "%s\n", sdev->scsi_dh_data->scsi_dh->name); -} - -static struct device_attribute scsi_dh_state_attr = - __ATTR(dh_state, S_IRUGO | S_IWUSR, show_dh_state, - store_dh_state); - -/* - * scsi_dh_sysfs_attr_add - Callback for scsi_init_dh - */ -static int scsi_dh_sysfs_attr_add(struct device *dev, void *data) -{ - struct scsi_device *sdev; - int err; - - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - err = device_create_file(&sdev->sdev_gendev, - &scsi_dh_state_attr); - - return 0; -} - -/* - * scsi_dh_sysfs_attr_remove - Callback for scsi_exit_dh - */ -static int scsi_dh_sysfs_attr_remove(struct device *dev, void *data) -{ - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - device_remove_file(&sdev->sdev_gendev, - &scsi_dh_state_attr); - - return 0; -} - -/* - * scsi_dh_notifier - notifier chain callback - */ -static int scsi_dh_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct scsi_device *sdev; - int err = 0; - struct scsi_device_handler *devinfo = NULL; - - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (action == BUS_NOTIFY_ADD_DEVICE) { - err = device_create_file(dev, &scsi_dh_state_attr); - /* don't care about err */ - devinfo = device_handler_match(NULL, sdev); - if (devinfo) - err = scsi_dh_handler_attach(sdev, devinfo); - } else if (action == BUS_NOTIFY_DEL_DEVICE) { - device_remove_file(dev, &scsi_dh_state_attr); - if (sdev->scsi_dh_data) - scsi_dh_handler_detach(sdev); - } - return err; -} - -/* - * scsi_dh_notifier_add - Callback for scsi_register_device_handler - */ -static int scsi_dh_notifier_add(struct device *dev, void *data) -{ - struct scsi_device_handler *scsi_dh = data; - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - if (!get_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (device_handler_match(scsi_dh, sdev)) - scsi_dh_handler_attach(sdev, scsi_dh); - - put_device(dev); - - return 0; -} - -/* - * scsi_dh_notifier_remove - Callback for scsi_unregister_device_handler - */ -static int scsi_dh_notifier_remove(struct device *dev, void *data) -{ - struct scsi_device_handler *scsi_dh = data; - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - if (!get_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh == scsi_dh) - scsi_dh_handler_detach(sdev); - - put_device(dev); - - return 0; -} - -/* - * scsi_register_device_handler - register a device handler personality - * module. - * @scsi_dh - device handler to be registered. - * - * Returns 0 on success, -EBUSY if handler already registered. - */ -int scsi_register_device_handler(struct scsi_device_handler *scsi_dh) -{ - if (__scsi_dh_lookup(scsi_dh->name)) - return -EBUSY; - - if (!scsi_dh->attach || !scsi_dh->detach) - return -EINVAL; - - spin_lock(&list_lock); - list_add(&scsi_dh->list, &scsi_dh_list); - spin_unlock(&list_lock); - - bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, scsi_dh_notifier_add); - printk(KERN_INFO "%s: device handler registered\n", scsi_dh->name); - - return SCSI_DH_OK; -} -EXPORT_SYMBOL_GPL(scsi_register_device_handler); - -/* - * scsi_unregister_device_handler - register a device handler personality - * module. - * @scsi_dh - device handler to be unregistered. - * - * Returns 0 on success, -ENODEV if handler not registered. - */ -int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) -{ - - if (!__scsi_dh_lookup(scsi_dh->name)) - return -ENODEV; - - bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, - scsi_dh_notifier_remove); - - spin_lock(&list_lock); - list_del(&scsi_dh->list); - spin_unlock(&list_lock); - printk(KERN_INFO "%s: device handler unregistered\n", scsi_dh->name); - - return SCSI_DH_OK; -} -EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); - -/* - * scsi_dh_activate - activate the path associated with the scsi_device - * corresponding to the given request queue. - * Returns immediately without waiting for activation to be completed. - * @q - Request queue that is associated with the scsi_device to be - * activated. - * @fn - Function to be called upon completion of the activation. - * Function fn is called with data (below) and the error code. - * Function fn may be called from the same calling context. So, - * do not hold the lock in the caller which may be needed in fn. - * @data - data passed to the function fn upon completion. - * - */ -int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) -{ - int err = 0; - unsigned long flags; - struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh = NULL; - struct device *dev = NULL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev) { - spin_unlock_irqrestore(q->queue_lock, flags); - err = SCSI_DH_NOSYS; - if (fn) - fn(data, err); - return err; - } - - if (sdev->scsi_dh_data) - scsi_dh = sdev->scsi_dh_data->scsi_dh; - dev = get_device(&sdev->sdev_gendev); - if (!scsi_dh || !dev || - sdev->sdev_state == SDEV_CANCEL || - sdev->sdev_state == SDEV_DEL) - err = SCSI_DH_NOSYS; - if (sdev->sdev_state == SDEV_OFFLINE) - err = SCSI_DH_DEV_OFFLINED; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (err) { - if (fn) - fn(data, err); - goto out; - } - - if (scsi_dh->activate) - err = scsi_dh->activate(sdev, fn, data); -out: - put_device(dev); - return err; -} -EXPORT_SYMBOL_GPL(scsi_dh_activate); - -/* - * scsi_dh_set_params - set the parameters for the device as per the - * string specified in params. - * @q - Request queue that is associated with the scsi_device for - * which the parameters to be set. - * @params - parameters in the following format - * "no_of_params\0param1\0param2\0param3\0...\0" - * for example, string for 2 parameters with value 10 and 21 - * is specified as "2\010\021\0". - */ -int scsi_dh_set_params(struct request_queue *q, const char *params) -{ - int err = -SCSI_DH_NOSYS; - unsigned long flags; - struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh = NULL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (sdev && sdev->scsi_dh_data) - scsi_dh = sdev->scsi_dh_data->scsi_dh; - if (scsi_dh && scsi_dh->set_params && get_device(&sdev->sdev_gendev)) - err = 0; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (err) - return err; - err = scsi_dh->set_params(sdev, params); - put_device(&sdev->sdev_gendev); - return err; -} -EXPORT_SYMBOL_GPL(scsi_dh_set_params); - -/* - * scsi_dh_attach - Attach device handler - * @q - Request queue that is associated with the scsi_device - * the handler should be attached to - * @name - name of the handler to attach - */ -int scsi_dh_attach(struct request_queue *q, const char *name) -{ - unsigned long flags; - struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh; - int err = 0; - - scsi_dh = scsi_dh_lookup(name); - if (!scsi_dh) - return -EINVAL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - err = -ENODEV; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (err) - return err; - - if (sdev->scsi_dh_data) { - if (sdev->scsi_dh_data->scsi_dh != scsi_dh) - err = -EBUSY; - goto out_put_device; - } - - err = scsi_dh_handler_attach(sdev, scsi_dh); - -out_put_device: - put_device(&sdev->sdev_gendev); - return err; -} -EXPORT_SYMBOL_GPL(scsi_dh_attach); - -/* - * scsi_dh_attached_handler_name - Get attached device handler's name - * @q - Request queue that is associated with the scsi_device - * that may have a device handler attached - * @gfp - the GFP mask used in the kmalloc() call when allocating memory - * - * Returns name of attached handler, NULL if no handler is attached. - * Caller must take care to free the returned string. - */ -const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) -{ - unsigned long flags; - struct scsi_device *sdev; - const char *handler_name = NULL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - sdev = NULL; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (!sdev) - return NULL; - - if (sdev->scsi_dh_data) - handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp); - - put_device(&sdev->sdev_gendev); - return handler_name; -} -EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name); - -static struct notifier_block scsi_dh_nb = { - .notifier_call = scsi_dh_notifier -}; - -static int __init scsi_dh_init(void) -{ - int r; - - r = bus_register_notifier(&scsi_bus_type, &scsi_dh_nb); - - if (!r) - bus_for_each_dev(&scsi_bus_type, NULL, NULL, - scsi_dh_sysfs_attr_add); - - return r; -} - -static void __exit scsi_dh_exit(void) -{ - bus_for_each_dev(&scsi_bus_type, NULL, NULL, - scsi_dh_sysfs_attr_remove); - bus_unregister_notifier(&scsi_bus_type, &scsi_dh_nb); -} - -module_init(scsi_dh_init); -module_exit(scsi_dh_exit); - -MODULE_DESCRIPTION("SCSI device handler"); -MODULE_AUTHOR("Chandra Seetharaman "); -MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c new file mode 100644 index 0000000..3de9b67 --- /dev/null +++ b/drivers/scsi/scsi_dh.c @@ -0,0 +1,571 @@ +/* + * SCSI device handler infrastruture. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007 + * Authors: + * Chandra Seetharaman + * Mike Anderson + */ + +#include +#include +#include +#include "scsi_priv.h" + +static DEFINE_SPINLOCK(list_lock); +static LIST_HEAD(scsi_dh_list); + +static struct scsi_device_handler *__scsi_dh_lookup(const char *name) +{ + struct scsi_device_handler *tmp, *found = NULL; + + spin_lock(&list_lock); + list_for_each_entry(tmp, &scsi_dh_list, list) { + if (!strncmp(tmp->name, name, strlen(tmp->name))) { + found = tmp; + break; + } + } + spin_unlock(&list_lock); + return found; +} + +static struct scsi_device_handler *scsi_dh_lookup(const char *name) +{ + struct scsi_device_handler *dh; + + dh = __scsi_dh_lookup(name); + if (!dh) { + request_module(name); + dh = __scsi_dh_lookup(name); + } + + return dh; +} + +/* + * device_handler_match_function - Match a device handler to a device + * @sdev - SCSI device to be tested + * + * Tests @sdev against the match function of all registered device_handler. + * Returns the found device handler or NULL if not found. + */ +static struct scsi_device_handler * +device_handler_match_function(struct scsi_device *sdev) +{ + struct scsi_device_handler *tmp_dh, *found_dh = NULL; + + spin_lock(&list_lock); + list_for_each_entry(tmp_dh, &scsi_dh_list, list) { + if (tmp_dh->match && tmp_dh->match(sdev)) { + found_dh = tmp_dh; + break; + } + } + spin_unlock(&list_lock); + return found_dh; +} + +/* + * device_handler_match - Attach a device handler to a device + * @scsi_dh - The device handler to match against or NULL + * @sdev - SCSI device to be tested against @scsi_dh + * + * Tests @sdev against the device handler @scsi_dh or against + * all registered device_handler if @scsi_dh == NULL. + * Returns the found device handler or NULL if not found. + */ +static struct scsi_device_handler * +device_handler_match(struct scsi_device_handler *scsi_dh, + struct scsi_device *sdev) +{ + struct scsi_device_handler *found_dh; + + found_dh = device_handler_match_function(sdev); + + if (scsi_dh && found_dh != scsi_dh) + found_dh = NULL; + + return found_dh; +} + +/* + * scsi_dh_handler_attach - Attach a device handler to a device + * @sdev - SCSI device the device handler should attach to + * @scsi_dh - The device handler to attach + */ +static int scsi_dh_handler_attach(struct scsi_device *sdev, + struct scsi_device_handler *scsi_dh) +{ + struct scsi_dh_data *d; + + if (!try_module_get(scsi_dh->module)) + return -EINVAL; + + d = scsi_dh->attach(sdev); + if (IS_ERR(d)) { + sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%ld)\n", + scsi_dh->name, PTR_ERR(d)); + module_put(scsi_dh->module); + return PTR_ERR(d); + } + + d->scsi_dh = scsi_dh; + d->sdev = sdev; + + spin_lock_irq(sdev->request_queue->queue_lock); + sdev->scsi_dh_data = d; + spin_unlock_irq(sdev->request_queue->queue_lock); + return 0; +} + +/* + * scsi_dh_handler_detach - Detach a device handler from a device + * @sdev - SCSI device the device handler should be detached from + */ +static void scsi_dh_handler_detach(struct scsi_device *sdev) +{ + struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data; + struct scsi_device_handler *scsi_dh = scsi_dh_data->scsi_dh; + + scsi_dh->detach(sdev); + + spin_lock_irq(sdev->request_queue->queue_lock); + sdev->scsi_dh_data = NULL; + spin_unlock_irq(sdev->request_queue->queue_lock); + + sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", scsi_dh->name); + module_put(scsi_dh->module); +} + +/* + * Functions for sysfs attribute 'dh_state' + */ +static ssize_t +store_dh_state(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device_handler *scsi_dh; + int err = -EINVAL; + + if (sdev->sdev_state == SDEV_CANCEL || + sdev->sdev_state == SDEV_DEL) + return -ENODEV; + + if (!sdev->scsi_dh_data) { + /* + * Attach to a device handler + */ + scsi_dh = scsi_dh_lookup(buf); + if (!scsi_dh) + return err; + err = scsi_dh_handler_attach(sdev, scsi_dh); + } else { + scsi_dh = sdev->scsi_dh_data->scsi_dh; + if (!strncmp(buf, "detach", 6)) { + /* + * Detach from a device handler + */ + scsi_dh_handler_detach(sdev); + err = 0; + } else if (!strncmp(buf, "activate", 8)) { + /* + * Activate a device handler + */ + if (scsi_dh->activate) + err = scsi_dh->activate(sdev, NULL, NULL); + else + err = 0; + } + } + + return err<0?err:count; +} + +static ssize_t +show_dh_state(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + if (!sdev->scsi_dh_data) + return snprintf(buf, 20, "detached\n"); + + return snprintf(buf, 20, "%s\n", sdev->scsi_dh_data->scsi_dh->name); +} + +static struct device_attribute scsi_dh_state_attr = + __ATTR(dh_state, S_IRUGO | S_IWUSR, show_dh_state, + store_dh_state); + +/* + * scsi_dh_sysfs_attr_add - Callback for scsi_init_dh + */ +static int scsi_dh_sysfs_attr_add(struct device *dev, void *data) +{ + struct scsi_device *sdev; + int err; + + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + + err = device_create_file(&sdev->sdev_gendev, + &scsi_dh_state_attr); + + return 0; +} + +/* + * scsi_dh_sysfs_attr_remove - Callback for scsi_exit_dh + */ +static int scsi_dh_sysfs_attr_remove(struct device *dev, void *data) +{ + struct scsi_device *sdev; + + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + + device_remove_file(&sdev->sdev_gendev, + &scsi_dh_state_attr); + + return 0; +} + +/* + * scsi_dh_notifier - notifier chain callback + */ +static int scsi_dh_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct scsi_device *sdev; + int err = 0; + struct scsi_device_handler *devinfo = NULL; + + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + + if (action == BUS_NOTIFY_ADD_DEVICE) { + err = device_create_file(dev, &scsi_dh_state_attr); + /* don't care about err */ + devinfo = device_handler_match(NULL, sdev); + if (devinfo) + err = scsi_dh_handler_attach(sdev, devinfo); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + device_remove_file(dev, &scsi_dh_state_attr); + if (sdev->scsi_dh_data) + scsi_dh_handler_detach(sdev); + } + return err; +} + +/* + * scsi_dh_notifier_add - Callback for scsi_register_device_handler + */ +static int scsi_dh_notifier_add(struct device *dev, void *data) +{ + struct scsi_device_handler *scsi_dh = data; + struct scsi_device *sdev; + + if (!scsi_is_sdev_device(dev)) + return 0; + + if (!get_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + + if (device_handler_match(scsi_dh, sdev)) + scsi_dh_handler_attach(sdev, scsi_dh); + + put_device(dev); + + return 0; +} + +/* + * scsi_dh_notifier_remove - Callback for scsi_unregister_device_handler + */ +static int scsi_dh_notifier_remove(struct device *dev, void *data) +{ + struct scsi_device_handler *scsi_dh = data; + struct scsi_device *sdev; + + if (!scsi_is_sdev_device(dev)) + return 0; + + if (!get_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + + if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh == scsi_dh) + scsi_dh_handler_detach(sdev); + + put_device(dev); + + return 0; +} + +/* + * scsi_register_device_handler - register a device handler personality + * module. + * @scsi_dh - device handler to be registered. + * + * Returns 0 on success, -EBUSY if handler already registered. + */ +int scsi_register_device_handler(struct scsi_device_handler *scsi_dh) +{ + if (__scsi_dh_lookup(scsi_dh->name)) + return -EBUSY; + + if (!scsi_dh->attach || !scsi_dh->detach) + return -EINVAL; + + spin_lock(&list_lock); + list_add(&scsi_dh->list, &scsi_dh_list); + spin_unlock(&list_lock); + + bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, scsi_dh_notifier_add); + printk(KERN_INFO "%s: device handler registered\n", scsi_dh->name); + + return SCSI_DH_OK; +} +EXPORT_SYMBOL_GPL(scsi_register_device_handler); + +/* + * scsi_unregister_device_handler - register a device handler personality + * module. + * @scsi_dh - device handler to be unregistered. + * + * Returns 0 on success, -ENODEV if handler not registered. + */ +int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) +{ + + if (!__scsi_dh_lookup(scsi_dh->name)) + return -ENODEV; + + bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, + scsi_dh_notifier_remove); + + spin_lock(&list_lock); + list_del(&scsi_dh->list); + spin_unlock(&list_lock); + printk(KERN_INFO "%s: device handler unregistered\n", scsi_dh->name); + + return SCSI_DH_OK; +} +EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); + +/* + * scsi_dh_activate - activate the path associated with the scsi_device + * corresponding to the given request queue. + * Returns immediately without waiting for activation to be completed. + * @q - Request queue that is associated with the scsi_device to be + * activated. + * @fn - Function to be called upon completion of the activation. + * Function fn is called with data (below) and the error code. + * Function fn may be called from the same calling context. So, + * do not hold the lock in the caller which may be needed in fn. + * @data - data passed to the function fn upon completion. + * + */ +int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) +{ + int err = 0; + unsigned long flags; + struct scsi_device *sdev; + struct scsi_device_handler *scsi_dh = NULL; + struct device *dev = NULL; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (!sdev) { + spin_unlock_irqrestore(q->queue_lock, flags); + err = SCSI_DH_NOSYS; + if (fn) + fn(data, err); + return err; + } + + if (sdev->scsi_dh_data) + scsi_dh = sdev->scsi_dh_data->scsi_dh; + dev = get_device(&sdev->sdev_gendev); + if (!scsi_dh || !dev || + sdev->sdev_state == SDEV_CANCEL || + sdev->sdev_state == SDEV_DEL) + err = SCSI_DH_NOSYS; + if (sdev->sdev_state == SDEV_OFFLINE) + err = SCSI_DH_DEV_OFFLINED; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (err) { + if (fn) + fn(data, err); + goto out; + } + + if (scsi_dh->activate) + err = scsi_dh->activate(sdev, fn, data); +out: + put_device(dev); + return err; +} +EXPORT_SYMBOL_GPL(scsi_dh_activate); + +/* + * scsi_dh_set_params - set the parameters for the device as per the + * string specified in params. + * @q - Request queue that is associated with the scsi_device for + * which the parameters to be set. + * @params - parameters in the following format + * "no_of_params\0param1\0param2\0param3\0...\0" + * for example, string for 2 parameters with value 10 and 21 + * is specified as "2\010\021\0". + */ +int scsi_dh_set_params(struct request_queue *q, const char *params) +{ + int err = -SCSI_DH_NOSYS; + unsigned long flags; + struct scsi_device *sdev; + struct scsi_device_handler *scsi_dh = NULL; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (sdev && sdev->scsi_dh_data) + scsi_dh = sdev->scsi_dh_data->scsi_dh; + if (scsi_dh && scsi_dh->set_params && get_device(&sdev->sdev_gendev)) + err = 0; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (err) + return err; + err = scsi_dh->set_params(sdev, params); + put_device(&sdev->sdev_gendev); + return err; +} +EXPORT_SYMBOL_GPL(scsi_dh_set_params); + +/* + * scsi_dh_attach - Attach device handler + * @q - Request queue that is associated with the scsi_device + * the handler should be attached to + * @name - name of the handler to attach + */ +int scsi_dh_attach(struct request_queue *q, const char *name) +{ + unsigned long flags; + struct scsi_device *sdev; + struct scsi_device_handler *scsi_dh; + int err = 0; + + scsi_dh = scsi_dh_lookup(name); + if (!scsi_dh) + return -EINVAL; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + err = -ENODEV; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (err) + return err; + + if (sdev->scsi_dh_data) { + if (sdev->scsi_dh_data->scsi_dh != scsi_dh) + err = -EBUSY; + goto out_put_device; + } + + err = scsi_dh_handler_attach(sdev, scsi_dh); + +out_put_device: + put_device(&sdev->sdev_gendev); + return err; +} +EXPORT_SYMBOL_GPL(scsi_dh_attach); + +/* + * scsi_dh_attached_handler_name - Get attached device handler's name + * @q - Request queue that is associated with the scsi_device + * that may have a device handler attached + * @gfp - the GFP mask used in the kmalloc() call when allocating memory + * + * Returns name of attached handler, NULL if no handler is attached. + * Caller must take care to free the returned string. + */ +const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) +{ + unsigned long flags; + struct scsi_device *sdev; + const char *handler_name = NULL; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + sdev = NULL; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (!sdev) + return NULL; + + if (sdev->scsi_dh_data) + handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp); + + put_device(&sdev->sdev_gendev); + return handler_name; +} +EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name); + +static struct notifier_block scsi_dh_nb = { + .notifier_call = scsi_dh_notifier +}; + +static int __init scsi_dh_init(void) +{ + int r; + + r = bus_register_notifier(&scsi_bus_type, &scsi_dh_nb); + + if (!r) + bus_for_each_dev(&scsi_bus_type, NULL, NULL, + scsi_dh_sysfs_attr_add); + + return r; +} + +static void __exit scsi_dh_exit(void) +{ + bus_for_each_dev(&scsi_bus_type, NULL, NULL, + scsi_dh_sysfs_attr_remove); + bus_unregister_notifier(&scsi_bus_type, &scsi_dh_nb); +} + +module_init(scsi_dh_init); +module_exit(scsi_dh_exit); + +MODULE_DESCRIPTION("SCSI device handler"); +MODULE_AUTHOR("Chandra Seetharaman "); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 086b91d052ebe4ead5d28021afe3bdfd70af15bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:57 +0200 Subject: scsi_dh: integrate into the core SCSI code Stop building scsi_dh as a separate module and integrate it fully into the core SCSI code with explicit callouts at bus scan time. For now the callouts are placed at the same point as the old bus notifiers were called, but in the future we will be able to look at ALUA INQUIRY data earlier on. Note that this also means that the device handler modules need to be loaded by the time we scan the bus. The next patches will add support for autoloading device handlers at bus scan time to make sure they are always loaded if they are enabled in the kernel config. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Acked-by: Mike Snitzer Signed-off-by: James Bottomley diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 471d0879..1a8c9b5 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -172,6 +172,7 @@ scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o scsi_mod-y += scsi_trace.o scsi_logging.o scsi_mod-$(CONFIG_PM) += scsi_pm.o +scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o hv_storvsc-y := storvsc_drv.o diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig index 69abd0a..e5647d5 100644 --- a/drivers/scsi/device_handler/Kconfig +++ b/drivers/scsi/device_handler/Kconfig @@ -3,7 +3,7 @@ # menuconfig SCSI_DH - tristate "SCSI Device Handlers" + bool "SCSI Device Handlers" depends on SCSI default n help diff --git a/drivers/scsi/device_handler/Makefile b/drivers/scsi/device_handler/Makefile index e1d2ea0..09866c5 100644 --- a/drivers/scsi/device_handler/Makefile +++ b/drivers/scsi/device_handler/Makefile @@ -1,7 +1,6 @@ # # SCSI Device Handler # -obj-$(CONFIG_SCSI_DH) += scsi_dh.o obj-$(CONFIG_SCSI_DH_RDAC) += scsi_dh_rdac.o obj-$(CONFIG_SCSI_DH_HP_SW) += scsi_dh_hp_sw.o obj-$(CONFIG_SCSI_DH_EMC) += scsi_dh_emc.o diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 3de9b67..f0dfdcc 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -57,15 +57,8 @@ static struct scsi_device_handler *scsi_dh_lookup(const char *name) return dh; } -/* - * device_handler_match_function - Match a device handler to a device - * @sdev - SCSI device to be tested - * - * Tests @sdev against the match function of all registered device_handler. - * Returns the found device handler or NULL if not found. - */ static struct scsi_device_handler * -device_handler_match_function(struct scsi_device *sdev) +device_handler_match(struct scsi_device *sdev) { struct scsi_device_handler *tmp_dh, *found_dh = NULL; @@ -81,29 +74,6 @@ device_handler_match_function(struct scsi_device *sdev) } /* - * device_handler_match - Attach a device handler to a device - * @scsi_dh - The device handler to match against or NULL - * @sdev - SCSI device to be tested against @scsi_dh - * - * Tests @sdev against the device handler @scsi_dh or against - * all registered device_handler if @scsi_dh == NULL. - * Returns the found device handler or NULL if not found. - */ -static struct scsi_device_handler * -device_handler_match(struct scsi_device_handler *scsi_dh, - struct scsi_device *sdev) -{ - struct scsi_device_handler *found_dh; - - found_dh = device_handler_match_function(sdev); - - if (scsi_dh && found_dh != scsi_dh) - found_dh = NULL; - - return found_dh; -} - -/* * scsi_dh_handler_attach - Attach a device handler to a device * @sdev - SCSI device the device handler should attach to * @scsi_dh - The device handler to attach @@ -212,119 +182,26 @@ static struct device_attribute scsi_dh_state_attr = __ATTR(dh_state, S_IRUGO | S_IWUSR, show_dh_state, store_dh_state); -/* - * scsi_dh_sysfs_attr_add - Callback for scsi_init_dh - */ -static int scsi_dh_sysfs_attr_add(struct device *dev, void *data) +int scsi_dh_add_device(struct scsi_device *sdev) { - struct scsi_device *sdev; + struct scsi_device_handler *devinfo; int err; - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - err = device_create_file(&sdev->sdev_gendev, - &scsi_dh_state_attr); - - return 0; -} - -/* - * scsi_dh_sysfs_attr_remove - Callback for scsi_exit_dh - */ -static int scsi_dh_sysfs_attr_remove(struct device *dev, void *data) -{ - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - device_remove_file(&sdev->sdev_gendev, - &scsi_dh_state_attr); - - return 0; -} + err = device_create_file(&sdev->sdev_gendev, &scsi_dh_state_attr); + if (err) + return err; -/* - * scsi_dh_notifier - notifier chain callback - */ -static int scsi_dh_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct scsi_device *sdev; - int err = 0; - struct scsi_device_handler *devinfo = NULL; - - if (!scsi_is_sdev_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (action == BUS_NOTIFY_ADD_DEVICE) { - err = device_create_file(dev, &scsi_dh_state_attr); - /* don't care about err */ - devinfo = device_handler_match(NULL, sdev); - if (devinfo) - err = scsi_dh_handler_attach(sdev, devinfo); - } else if (action == BUS_NOTIFY_DEL_DEVICE) { - device_remove_file(dev, &scsi_dh_state_attr); - if (sdev->scsi_dh_data) - scsi_dh_handler_detach(sdev); - } + devinfo = device_handler_match(sdev); + if (devinfo) + err = scsi_dh_handler_attach(sdev, devinfo); return err; } -/* - * scsi_dh_notifier_add - Callback for scsi_register_device_handler - */ -static int scsi_dh_notifier_add(struct device *dev, void *data) +void scsi_dh_remove_device(struct scsi_device *sdev) { - struct scsi_device_handler *scsi_dh = data; - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - if (!get_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (device_handler_match(scsi_dh, sdev)) - scsi_dh_handler_attach(sdev, scsi_dh); - - put_device(dev); - - return 0; -} - -/* - * scsi_dh_notifier_remove - Callback for scsi_unregister_device_handler - */ -static int scsi_dh_notifier_remove(struct device *dev, void *data) -{ - struct scsi_device_handler *scsi_dh = data; - struct scsi_device *sdev; - - if (!scsi_is_sdev_device(dev)) - return 0; - - if (!get_device(dev)) - return 0; - - sdev = to_scsi_device(dev); - - if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh == scsi_dh) + if (sdev->scsi_dh_data) scsi_dh_handler_detach(sdev); - - put_device(dev); - - return 0; + device_remove_file(&sdev->sdev_gendev, &scsi_dh_state_attr); } /* @@ -346,7 +223,6 @@ int scsi_register_device_handler(struct scsi_device_handler *scsi_dh) list_add(&scsi_dh->list, &scsi_dh_list); spin_unlock(&list_lock); - bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, scsi_dh_notifier_add); printk(KERN_INFO "%s: device handler registered\n", scsi_dh->name); return SCSI_DH_OK; @@ -362,13 +238,9 @@ EXPORT_SYMBOL_GPL(scsi_register_device_handler); */ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) { - if (!__scsi_dh_lookup(scsi_dh->name)) return -ENODEV; - bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, - scsi_dh_notifier_remove); - spin_lock(&list_lock); list_del(&scsi_dh->list); spin_unlock(&list_lock); @@ -538,34 +410,3 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) return handler_name; } EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name); - -static struct notifier_block scsi_dh_nb = { - .notifier_call = scsi_dh_notifier -}; - -static int __init scsi_dh_init(void) -{ - int r; - - r = bus_register_notifier(&scsi_bus_type, &scsi_dh_nb); - - if (!r) - bus_for_each_dev(&scsi_bus_type, NULL, NULL, - scsi_dh_sysfs_attr_add); - - return r; -} - -static void __exit scsi_dh_exit(void) -{ - bus_for_each_dev(&scsi_bus_type, NULL, NULL, - scsi_dh_sysfs_attr_remove); - bus_unregister_notifier(&scsi_bus_type, &scsi_dh_nb); -} - -module_init(scsi_dh_init); -module_exit(scsi_dh_exit); - -MODULE_DESCRIPTION("SCSI device handler"); -MODULE_AUTHOR("Chandra Seetharaman "); -MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index e3902fc..644bb73 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -170,6 +170,15 @@ static inline void scsi_autopm_put_host(struct Scsi_Host *h) {} extern struct async_domain scsi_sd_pm_domain; extern struct async_domain scsi_sd_probe_domain; +/* scsi_dh.c */ +#ifdef CONFIG_SCSI_DH +int scsi_dh_add_device(struct scsi_device *sdev); +void scsi_dh_remove_device(struct scsi_device *sdev); +#else +static inline int scsi_dh_add_device(struct scsi_device *sdev) { return 0; } +static inline void scsi_dh_remove_device(struct scsi_device *sdev) { } +#endif + /* * internal scsi timeout functions: for use by mid-layer and transport * classes. diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9ad4116..b333389 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1030,11 +1030,20 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) "failed to add device: %d\n", error); return error; } + + error = scsi_dh_add_device(sdev); + if (error) { + sdev_printk(KERN_INFO, sdev, + "failed to add device handler: %d\n", error); + return error; + } + device_enable_async_suspend(&sdev->sdev_dev); error = device_add(&sdev->sdev_dev); if (error) { sdev_printk(KERN_INFO, sdev, "failed to add class device: %d\n", error); + scsi_dh_remove_device(sdev); device_del(&sdev->sdev_gendev); return error; } @@ -1074,6 +1083,7 @@ void __scsi_remove_device(struct scsi_device *sdev) bsg_unregister_queue(sdev->request_queue); device_unregister(&sdev->sdev_dev); transport_remove_device(dev); + scsi_dh_remove_device(sdev); device_del(dev); } else put_device(&sdev->sdev_dev); diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 966b921..3a37b4c45 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -55,7 +55,7 @@ enum { SCSI_DH_NOSYS, SCSI_DH_DRIVER_MAX, }; -#if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE) +#ifdef CONFIG_SCSI_DH extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); extern int scsi_dh_attach(struct request_queue *, const char *); extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t); -- cgit v0.10.2 From d95dbff2a41e934cd8789734b34dc591e78ba11c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:58 +0200 Subject: scsi_dh: move device matching to the core code Add a single list of devices that need non-ALUA device handlers to the core scsi_dh code so that we can autoload the modules for them at probe time. While this is a little ugly in terms of architecture it actually significantly simplifies the code in addition to the new autoloading functionality. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Acked-by: Mike Snitzer Signed-off-by: James Bottomley diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 854b568..ace2457 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -819,11 +819,6 @@ static int alua_prep_fn(struct scsi_device *sdev, struct request *req) } -static bool alua_match(struct scsi_device *sdev) -{ - return (scsi_device_tpgs(sdev) != 0); -} - /* * alua_bus_attach - Attach device handler * @sdev: device to be attached to @@ -877,7 +872,6 @@ static struct scsi_device_handler alua_dh = { .check_sense = alua_check_sense, .activate = alua_activate, .set_params = alua_set_params, - .match = alua_match, }; static int __init alua_init(void) diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 6ed1caa..fd31e67 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -622,34 +622,6 @@ done: return result; } -static const struct { - char *vendor; - char *model; -} clariion_dev_list[] = { - {"DGC", "RAID"}, - {"DGC", "DISK"}, - {"DGC", "VRAID"}, - {NULL, NULL}, -}; - -static bool clariion_match(struct scsi_device *sdev) -{ - int i; - - if (scsi_device_tpgs(sdev)) - return false; - - for (i = 0; clariion_dev_list[i].vendor; i++) { - if (!strncmp(sdev->vendor, clariion_dev_list[i].vendor, - strlen(clariion_dev_list[i].vendor)) && - !strncmp(sdev->model, clariion_dev_list[i].model, - strlen(clariion_dev_list[i].model))) { - return true; - } - } - return false; -} - static struct scsi_dh_data *clariion_bus_attach(struct scsi_device *sdev) { struct clariion_dh_data *h; @@ -698,7 +670,6 @@ static struct scsi_device_handler clariion_dh = { .activate = clariion_activate, .prep_fn = clariion_prep_fn, .set_params = clariion_set_params, - .match = clariion_match, }; static int __init clariion_init(void) diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 485d995..1bf10d3 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -311,35 +311,6 @@ static int hp_sw_activate(struct scsi_device *sdev, return 0; } -static const struct { - char *vendor; - char *model; -} hp_sw_dh_data_list[] = { - {"COMPAQ", "MSA1000 VOLUME"}, - {"COMPAQ", "HSV110"}, - {"HP", "HSV100"}, - {"DEC", "HSG80"}, - {NULL, NULL}, -}; - -static bool hp_sw_match(struct scsi_device *sdev) -{ - int i; - - if (scsi_device_tpgs(sdev)) - return false; - - for (i = 0; hp_sw_dh_data_list[i].vendor; i++) { - if (!strncmp(sdev->vendor, hp_sw_dh_data_list[i].vendor, - strlen(hp_sw_dh_data_list[i].vendor)) && - !strncmp(sdev->model, hp_sw_dh_data_list[i].model, - strlen(hp_sw_dh_data_list[i].model))) { - return true; - } - } - return false; -} - static struct scsi_dh_data *hp_sw_bus_attach(struct scsi_device *sdev) { struct hp_sw_dh_data *h; @@ -379,7 +350,6 @@ static struct scsi_device_handler hp_sw_dh = { .detach = hp_sw_bus_detach, .activate = hp_sw_activate, .prep_fn = hp_sw_prep_fn, - .match = hp_sw_match, }; static int __init hp_sw_init(void) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index b46ace3..d89616f 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -778,55 +778,6 @@ static int rdac_check_sense(struct scsi_device *sdev, return SCSI_RETURN_NOT_HANDLED; } -static const struct { - char *vendor; - char *model; -} rdac_dev_list[] = { - {"IBM", "1722"}, - {"IBM", "1724"}, - {"IBM", "1726"}, - {"IBM", "1742"}, - {"IBM", "1745"}, - {"IBM", "1746"}, - {"IBM", "1813"}, - {"IBM", "1814"}, - {"IBM", "1815"}, - {"IBM", "1818"}, - {"IBM", "3526"}, - {"SGI", "TP9"}, - {"SGI", "IS"}, - {"STK", "OPENstorage D280"}, - {"STK", "FLEXLINE 380"}, - {"SUN", "CSM"}, - {"SUN", "LCSM100"}, - {"SUN", "STK6580_6780"}, - {"SUN", "SUN_6180"}, - {"SUN", "ArrayStorage"}, - {"DELL", "MD3"}, - {"NETAPP", "INF-01-00"}, - {"LSI", "INF-01-00"}, - {"ENGENIO", "INF-01-00"}, - {NULL, NULL}, -}; - -static bool rdac_match(struct scsi_device *sdev) -{ - int i; - - if (scsi_device_tpgs(sdev)) - return false; - - for (i = 0; rdac_dev_list[i].vendor; i++) { - if (!strncmp(sdev->vendor, rdac_dev_list[i].vendor, - strlen(rdac_dev_list[i].vendor)) && - !strncmp(sdev->model, rdac_dev_list[i].model, - strlen(rdac_dev_list[i].model))) { - return true; - } - } - return false; -} - static struct scsi_dh_data *rdac_bus_attach(struct scsi_device *sdev) { struct rdac_dh_data *h; @@ -895,7 +846,6 @@ static struct scsi_device_handler rdac_dh = { .attach = rdac_bus_attach, .detach = rdac_bus_detach, .activate = rdac_activate, - .match = rdac_match, }; static int __init rdac_init(void) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index f0dfdcc..9c15330 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -29,6 +29,67 @@ static DEFINE_SPINLOCK(list_lock); static LIST_HEAD(scsi_dh_list); +struct scsi_dh_blist { + const char *vendor; + const char *model; + const char *driver; +}; + +static const struct scsi_dh_blist scsi_dh_blist[] = { + {"DGC", "RAID", "clariion" }, + {"DGC", "DISK", "clariion" }, + {"DGC", "VRAID", "clariion" }, + + {"COMPAQ", "MSA1000 VOLUME", "hp_sw" }, + {"COMPAQ", "HSV110", "hp_sw" }, + {"HP", "HSV100", "hp_sw"}, + {"DEC", "HSG80", "hp_sw"}, + + {"IBM", "1722", "rdac", }, + {"IBM", "1724", "rdac", }, + {"IBM", "1726", "rdac", }, + {"IBM", "1742", "rdac", }, + {"IBM", "1745", "rdac", }, + {"IBM", "1746", "rdac", }, + {"IBM", "1813", "rdac", }, + {"IBM", "1814", "rdac", }, + {"IBM", "1815", "rdac", }, + {"IBM", "1818", "rdac", }, + {"IBM", "3526", "rdac", }, + {"SGI", "TP9", "rdac", }, + {"SGI", "IS", "rdac", }, + {"STK", "OPENstorage D280", "rdac", }, + {"STK", "FLEXLINE 380", "rdac", }, + {"SUN", "CSM", "rdac", }, + {"SUN", "LCSM100", "rdac", }, + {"SUN", "STK6580_6780", "rdac", }, + {"SUN", "SUN_6180", "rdac", }, + {"SUN", "ArrayStorage", "rdac", }, + {"DELL", "MD3", "rdac", }, + {"NETAPP", "INF-01-00", "rdac", }, + {"LSI", "INF-01-00", "rdac", }, + {"ENGENIO", "INF-01-00", "rdac", }, + {NULL, NULL, NULL }, +}; + +static const char * +scsi_dh_find_driver(struct scsi_device *sdev) +{ + const struct scsi_dh_blist *b; + + if (scsi_device_tpgs(sdev)) + return "alua"; + + for (b = scsi_dh_blist; b->vendor; b++) { + if (!strncmp(sdev->vendor, b->vendor, strlen(b->vendor)) && + !strncmp(sdev->model, b->model, strlen(b->model))) { + return b->driver; + } + } + return NULL; +} + + static struct scsi_device_handler *__scsi_dh_lookup(const char *name) { struct scsi_device_handler *tmp, *found = NULL; @@ -57,22 +118,6 @@ static struct scsi_device_handler *scsi_dh_lookup(const char *name) return dh; } -static struct scsi_device_handler * -device_handler_match(struct scsi_device *sdev) -{ - struct scsi_device_handler *tmp_dh, *found_dh = NULL; - - spin_lock(&list_lock); - list_for_each_entry(tmp_dh, &scsi_dh_list, list) { - if (tmp_dh->match && tmp_dh->match(sdev)) { - found_dh = tmp_dh; - break; - } - } - spin_unlock(&list_lock); - return found_dh; -} - /* * scsi_dh_handler_attach - Attach a device handler to a device * @sdev - SCSI device the device handler should attach to @@ -184,14 +229,17 @@ static struct device_attribute scsi_dh_state_attr = int scsi_dh_add_device(struct scsi_device *sdev) { - struct scsi_device_handler *devinfo; + struct scsi_device_handler *devinfo = NULL; + const char *drv; int err; err = device_create_file(&sdev->sdev_gendev, &scsi_dh_state_attr); if (err) return err; - devinfo = device_handler_match(sdev); + drv = scsi_dh_find_driver(sdev); + if (drv) + devinfo = scsi_dh_lookup(drv); if (devinfo) err = scsi_dh_handler_attach(sdev, devinfo); return err; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 798d679..4d501b7 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -215,7 +215,6 @@ struct scsi_device_handler { int (*activate)(struct scsi_device *, activate_complete, void *); int (*prep_fn)(struct scsi_device *, struct request *); int (*set_params)(struct scsi_device *, const char *); - bool (*match)(struct scsi_device *); }; struct scsi_dh_data { -- cgit v0.10.2 From ee14c674e8fc57251223054fb52dc0ecfe711028 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:16:59 +0200 Subject: scsi_dh: kill struct scsi_dh_data Add a ->handler and a ->handler_data field to struct scsi_device and kill this indirection. Also move struct scsi_device_handler to scsi_dh.h so that changes to it don't require rebuilding every SCSI LLDD. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index ace2457..cc2773b 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -62,7 +62,6 @@ #define ALUA_OPTIMIZE_STPG 1 struct alua_dh_data { - struct scsi_dh_data dh_data; int group_id; int rel_port; int tpgs; @@ -86,11 +85,6 @@ struct alua_dh_data { static char print_alua_state(int); static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *); -static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev) -{ - return container_of(sdev->scsi_dh_data, struct alua_dh_data, dh_data); -} - static int realloc_buffer(struct alua_dh_data *h, unsigned len) { if (h->buff && h->buff != h->inq) @@ -708,7 +702,7 @@ out: */ static int alua_set_params(struct scsi_device *sdev, const char *params) { - struct alua_dh_data *h = get_alua_data(sdev); + struct alua_dh_data *h = sdev->handler_data; unsigned int optimize = 0, argc; const char *p = params; int result = SCSI_DH_OK; @@ -746,7 +740,7 @@ MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than static int alua_activate(struct scsi_device *sdev, activate_complete fn, void *data) { - struct alua_dh_data *h = get_alua_data(sdev); + struct alua_dh_data *h = sdev->handler_data; int err = SCSI_DH_OK; int stpg = 0; @@ -804,7 +798,7 @@ out: */ static int alua_prep_fn(struct scsi_device *sdev, struct request *req) { - struct alua_dh_data *h = get_alua_data(sdev); + struct alua_dh_data *h = sdev->handler_data; int ret = BLKPREP_OK; if (h->state == TPGS_STATE_TRANSITIONING) @@ -823,14 +817,14 @@ static int alua_prep_fn(struct scsi_device *sdev, struct request *req) * alua_bus_attach - Attach device handler * @sdev: device to be attached to */ -static struct scsi_dh_data *alua_bus_attach(struct scsi_device *sdev) +static int alua_bus_attach(struct scsi_device *sdev) { struct alua_dh_data *h; int err; h = kzalloc(sizeof(*h) , GFP_KERNEL); if (!h) - return ERR_PTR(-ENOMEM); + return -ENOMEM; h->tpgs = TPGS_MODE_UNINITIALIZED; h->state = TPGS_STATE_OPTIMIZED; h->group_id = -1; @@ -843,11 +837,11 @@ static struct scsi_dh_data *alua_bus_attach(struct scsi_device *sdev) if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) goto failed; - sdev_printk(KERN_NOTICE, sdev, "%s: Attached\n", ALUA_DH_NAME); - return &h->dh_data; + sdev->handler_data = h; + return 0; failed: kfree(h); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* @@ -856,10 +850,11 @@ failed: */ static void alua_bus_detach(struct scsi_device *sdev) { - struct alua_dh_data *h = get_alua_data(sdev); + struct alua_dh_data *h = sdev->handler_data; if (h->buff && h->inq != h->buff) kfree(h->buff); + sdev->handler_data = NULL; kfree(h); } diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index fd31e67..e6fb97c 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -72,7 +72,6 @@ static const char * lun_state[] = }; struct clariion_dh_data { - struct scsi_dh_data dh_data; /* * Flags: * CLARIION_SHORT_TRESPASS @@ -114,13 +113,6 @@ struct clariion_dh_data { int current_sp; }; -static inline struct clariion_dh_data - *get_clariion_data(struct scsi_device *sdev) -{ - return container_of(sdev->scsi_dh_data, struct clariion_dh_data, - dh_data); -} - /* * Parse MODE_SELECT cmd reply. */ @@ -450,7 +442,7 @@ static int clariion_check_sense(struct scsi_device *sdev, static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) { - struct clariion_dh_data *h = get_clariion_data(sdev); + struct clariion_dh_data *h = sdev->handler_data; int ret = BLKPREP_OK; if (h->lun_state != CLARIION_LUN_OWNED) { @@ -533,7 +525,7 @@ retry: static int clariion_activate(struct scsi_device *sdev, activate_complete fn, void *data) { - struct clariion_dh_data *csdev = get_clariion_data(sdev); + struct clariion_dh_data *csdev = sdev->handler_data; int result; result = clariion_send_inquiry(sdev, csdev); @@ -574,7 +566,7 @@ done: */ static int clariion_set_params(struct scsi_device *sdev, const char *params) { - struct clariion_dh_data *csdev = get_clariion_data(sdev); + struct clariion_dh_data *csdev = sdev->handler_data; unsigned int hr = 0, st = 0, argc; const char *p = params; int result = SCSI_DH_OK; @@ -622,14 +614,14 @@ done: return result; } -static struct scsi_dh_data *clariion_bus_attach(struct scsi_device *sdev) +static int clariion_bus_attach(struct scsi_device *sdev) { struct clariion_dh_data *h; int err; h = kzalloc(sizeof(*h) , GFP_KERNEL); if (!h) - return ERR_PTR(-ENOMEM); + return -ENOMEM; h->lun_state = CLARIION_LUN_UNINITIALIZED; h->default_sp = CLARIION_UNBOUND_LU; h->current_sp = CLARIION_UNBOUND_LU; @@ -647,18 +639,19 @@ static struct scsi_dh_data *clariion_bus_attach(struct scsi_device *sdev) CLARIION_NAME, h->current_sp + 'A', h->port, lun_state[h->lun_state], h->default_sp + 'A'); - return &h->dh_data; + + sdev->handler_data = h; + return 0; failed: kfree(h); - return ERR_PTR(-EINVAL); + return -EINVAL; } static void clariion_bus_detach(struct scsi_device *sdev) { - struct clariion_dh_data *h = get_clariion_data(sdev); - - kfree(h); + kfree(sdev->handler_data); + sdev->handler_data = NULL; } static struct scsi_device_handler clariion_dh = { diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 1bf10d3..9406d5f 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -38,7 +38,6 @@ #define HP_SW_PATH_PASSIVE 1 struct hp_sw_dh_data { - struct scsi_dh_data dh_data; unsigned char sense[SCSI_SENSE_BUFFERSIZE]; int path_state; int retries; @@ -50,11 +49,6 @@ struct hp_sw_dh_data { static int hp_sw_start_stop(struct hp_sw_dh_data *); -static inline struct hp_sw_dh_data *get_hp_sw_data(struct scsi_device *sdev) -{ - return container_of(sdev->scsi_dh_data, struct hp_sw_dh_data, dh_data); -} - /* * tur_done - Handle TEST UNIT READY return status * @sdev: sdev the command has been sent to @@ -267,7 +261,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h) static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) { - struct hp_sw_dh_data *h = get_hp_sw_data(sdev); + struct hp_sw_dh_data *h = sdev->handler_data; int ret = BLKPREP_OK; if (h->path_state != HP_SW_PATH_ACTIVE) { @@ -292,7 +286,7 @@ static int hp_sw_activate(struct scsi_device *sdev, activate_complete fn, void *data) { int ret = SCSI_DH_OK; - struct hp_sw_dh_data *h = get_hp_sw_data(sdev); + struct hp_sw_dh_data *h = sdev->handler_data; ret = hp_sw_tur(sdev, h); @@ -311,14 +305,14 @@ static int hp_sw_activate(struct scsi_device *sdev, return 0; } -static struct scsi_dh_data *hp_sw_bus_attach(struct scsi_device *sdev) +static int hp_sw_bus_attach(struct scsi_device *sdev) { struct hp_sw_dh_data *h; int ret; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) - return ERR_PTR(-ENOMEM); + return -ENOMEM; h->path_state = HP_SW_PATH_UNINITIALIZED; h->retries = HP_SW_RETRIES; h->sdev = sdev; @@ -330,17 +324,18 @@ static struct scsi_dh_data *hp_sw_bus_attach(struct scsi_device *sdev) sdev_printk(KERN_INFO, sdev, "%s: attached to %s path\n", HP_SW_NAME, h->path_state == HP_SW_PATH_ACTIVE? "active":"passive"); - return &h->dh_data; + + sdev->handler_data = h; + return 0; failed: kfree(h); - return ERR_PTR(-EINVAL); + return -EINVAL; } static void hp_sw_bus_detach( struct scsi_device *sdev ) { - struct hp_sw_dh_data *h = get_hp_sw_data(sdev); - - kfree(h); + kfree(sdev->handler_data); + sdev->handler_data = NULL; } static struct scsi_device_handler hp_sw_dh = { diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index d89616f..3613581 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -181,7 +181,6 @@ struct c2_inquiry { }; struct rdac_dh_data { - struct scsi_dh_data dh_data; struct rdac_controller *ctlr; #define UNINITIALIZED_LUN (1 << 8) unsigned lun; @@ -260,11 +259,6 @@ do { \ sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \ } while (0); -static inline struct rdac_dh_data *get_rdac_data(struct scsi_device *sdev) -{ - return container_of(sdev->scsi_dh_data, struct rdac_dh_data, dh_data); -} - static struct request *get_rdac_req(struct scsi_device *sdev, void *buffer, unsigned buflen, int rw) { @@ -544,7 +538,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev, { struct scsi_sense_hdr sense_hdr; int err = SCSI_DH_IO, ret; - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; ret = scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sense_hdr); if (!ret) @@ -589,7 +583,7 @@ static void send_mode_select(struct work_struct *work) container_of(work, struct rdac_controller, ms_work); struct request *rq; struct scsi_device *sdev = ctlr->ms_sdev; - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; struct request_queue *q = sdev->request_queue; int err, retry_cnt = RDAC_RETRY_COUNT; struct rdac_queue_data *tmp, *qdata; @@ -648,7 +642,7 @@ static int queue_mode_select(struct scsi_device *sdev, if (!qdata) return SCSI_DH_RETRY; - qdata->h = get_rdac_data(sdev); + qdata->h = sdev->handler_data; qdata->callback_fn = fn; qdata->callback_data = data; @@ -667,7 +661,7 @@ static int queue_mode_select(struct scsi_device *sdev, static int rdac_activate(struct scsi_device *sdev, activate_complete fn, void *data) { - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; int err = SCSI_DH_OK; int act = 0; @@ -702,7 +696,7 @@ done: static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) { - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; int ret = BLKPREP_OK; if (h->state != RDAC_STATE_ACTIVE) { @@ -716,7 +710,7 @@ static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) static int rdac_check_sense(struct scsi_device *sdev, struct scsi_sense_hdr *sense_hdr) { - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; RDAC_LOG(RDAC_LOG_SENSE, sdev, "array %s, ctlr %d, " "I/O returned with sense %02x/%02x/%02x", @@ -778,7 +772,7 @@ static int rdac_check_sense(struct scsi_device *sdev, return SCSI_RETURN_NOT_HANDLED; } -static struct scsi_dh_data *rdac_bus_attach(struct scsi_device *sdev) +static int rdac_bus_attach(struct scsi_device *sdev) { struct rdac_dh_data *h; int err; @@ -787,7 +781,7 @@ static struct scsi_dh_data *rdac_bus_attach(struct scsi_device *sdev) h = kzalloc(sizeof(*h) , GFP_KERNEL); if (!h) - return ERR_PTR(-ENOMEM); + return -ENOMEM; h->lun = UNINITIALIZED_LUN; h->state = RDAC_STATE_ACTIVE; @@ -812,7 +806,8 @@ static struct scsi_dh_data *rdac_bus_attach(struct scsi_device *sdev) RDAC_NAME, h->lun, mode[(int)h->mode], lun_state[(int)h->lun_state]); - return &h->dh_data; + sdev->handler_data = h; + return 0; clean_ctlr: spin_lock(&list_lock); @@ -821,12 +816,12 @@ clean_ctlr: failed: kfree(h); - return ERR_PTR(-EINVAL); + return -EINVAL; } static void rdac_bus_detach( struct scsi_device *sdev ) { - struct rdac_dh_data *h = get_rdac_data(sdev); + struct rdac_dh_data *h = sdev->handler_data; if (h->ctlr && h->ctlr->ms_queued) flush_workqueue(kmpath_rdacd); @@ -835,6 +830,7 @@ static void rdac_bus_detach( struct scsi_device *sdev ) if (h->ctlr) kref_put(&h->ctlr->kref, release_controller); spin_unlock(&list_lock); + sdev->handler_data = NULL; kfree(h); } diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 9c15330..33c1148 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -126,26 +126,20 @@ static struct scsi_device_handler *scsi_dh_lookup(const char *name) static int scsi_dh_handler_attach(struct scsi_device *sdev, struct scsi_device_handler *scsi_dh) { - struct scsi_dh_data *d; + int error; if (!try_module_get(scsi_dh->module)) return -EINVAL; - d = scsi_dh->attach(sdev); - if (IS_ERR(d)) { - sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%ld)\n", - scsi_dh->name, PTR_ERR(d)); + error = scsi_dh->attach(sdev); + if (error) { + sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%d)\n", + scsi_dh->name, error); module_put(scsi_dh->module); - return PTR_ERR(d); - } - - d->scsi_dh = scsi_dh; - d->sdev = sdev; + } else + sdev->handler = scsi_dh; - spin_lock_irq(sdev->request_queue->queue_lock); - sdev->scsi_dh_data = d; - spin_unlock_irq(sdev->request_queue->queue_lock); - return 0; + return error; } /* @@ -154,17 +148,9 @@ static int scsi_dh_handler_attach(struct scsi_device *sdev, */ static void scsi_dh_handler_detach(struct scsi_device *sdev) { - struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data; - struct scsi_device_handler *scsi_dh = scsi_dh_data->scsi_dh; - - scsi_dh->detach(sdev); - - spin_lock_irq(sdev->request_queue->queue_lock); - sdev->scsi_dh_data = NULL; - spin_unlock_irq(sdev->request_queue->queue_lock); - - sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", scsi_dh->name); - module_put(scsi_dh->module); + sdev->handler->detach(sdev); + sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", sdev->handler->name); + module_put(sdev->handler->module); } /* @@ -182,7 +168,7 @@ store_dh_state(struct device *dev, struct device_attribute *attr, sdev->sdev_state == SDEV_DEL) return -ENODEV; - if (!sdev->scsi_dh_data) { + if (!sdev->handler) { /* * Attach to a device handler */ @@ -191,7 +177,6 @@ store_dh_state(struct device *dev, struct device_attribute *attr, return err; err = scsi_dh_handler_attach(sdev, scsi_dh); } else { - scsi_dh = sdev->scsi_dh_data->scsi_dh; if (!strncmp(buf, "detach", 6)) { /* * Detach from a device handler @@ -202,8 +187,8 @@ store_dh_state(struct device *dev, struct device_attribute *attr, /* * Activate a device handler */ - if (scsi_dh->activate) - err = scsi_dh->activate(sdev, NULL, NULL); + if (sdev->handler->activate) + err = sdev->handler->activate(sdev, NULL, NULL); else err = 0; } @@ -217,10 +202,10 @@ show_dh_state(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); - if (!sdev->scsi_dh_data) + if (!sdev->handler) return snprintf(buf, 20, "detached\n"); - return snprintf(buf, 20, "%s\n", sdev->scsi_dh_data->scsi_dh->name); + return snprintf(buf, 20, "%s\n", sdev->handler->name); } static struct device_attribute scsi_dh_state_attr = @@ -247,7 +232,7 @@ int scsi_dh_add_device(struct scsi_device *sdev) void scsi_dh_remove_device(struct scsi_device *sdev) { - if (sdev->scsi_dh_data) + if (sdev->handler) scsi_dh_handler_detach(sdev); device_remove_file(&sdev->sdev_gendev, &scsi_dh_state_attr); } @@ -316,7 +301,6 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) int err = 0; unsigned long flags; struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh = NULL; struct device *dev = NULL; spin_lock_irqsave(q->queue_lock, flags); @@ -329,10 +313,8 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) return err; } - if (sdev->scsi_dh_data) - scsi_dh = sdev->scsi_dh_data->scsi_dh; dev = get_device(&sdev->sdev_gendev); - if (!scsi_dh || !dev || + if (!sdev->handler || !dev || sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) err = SCSI_DH_NOSYS; @@ -346,8 +328,8 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) goto out; } - if (scsi_dh->activate) - err = scsi_dh->activate(sdev, fn, data); + if (sdev->handler->activate) + err = sdev->handler->activate(sdev, fn, data); out: put_device(dev); return err; @@ -369,19 +351,18 @@ int scsi_dh_set_params(struct request_queue *q, const char *params) int err = -SCSI_DH_NOSYS; unsigned long flags; struct scsi_device *sdev; - struct scsi_device_handler *scsi_dh = NULL; spin_lock_irqsave(q->queue_lock, flags); sdev = q->queuedata; - if (sdev && sdev->scsi_dh_data) - scsi_dh = sdev->scsi_dh_data->scsi_dh; - if (scsi_dh && scsi_dh->set_params && get_device(&sdev->sdev_gendev)) + if (sdev->handler && + sdev->handler->set_params && + get_device(&sdev->sdev_gendev)) err = 0; spin_unlock_irqrestore(q->queue_lock, flags); if (err) return err; - err = scsi_dh->set_params(sdev, params); + err = sdev->handler->set_params(sdev, params); put_device(&sdev->sdev_gendev); return err; } @@ -413,8 +394,8 @@ int scsi_dh_attach(struct request_queue *q, const char *name) if (err) return err; - if (sdev->scsi_dh_data) { - if (sdev->scsi_dh_data->scsi_dh != scsi_dh) + if (sdev->handler) { + if (sdev->handler != scsi_dh) err = -EBUSY; goto out_put_device; } @@ -451,8 +432,8 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) if (!sdev) return NULL; - if (sdev->scsi_dh_data) - handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp); + if (sdev->handler) + handler_name = kstrdup(sdev->handler->name, gfp); put_device(&sdev->sdev_gendev); return handler_name; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 3aacd96..410911c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "scsi_priv.h" @@ -464,11 +465,10 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; - if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh && - sdev->scsi_dh_data->scsi_dh->check_sense) { + if (sdev->handler && sdev->handler->check_sense) { int rc; - rc = sdev->scsi_dh_data->scsi_dh->check_sense(sdev, &sshdr); + rc = sdev->handler->check_sense(sdev, &sshdr); if (rc != SCSI_RETURN_NOT_HANDLED) return rc; /* handler does not care. Drop down to default handling */ diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 882864f..cbfc599 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1248,9 +1249,8 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd = req->special; - if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh - && sdev->scsi_dh_data->scsi_dh->prep_fn)) { - int ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req); + if (unlikely(sdev->handler && sdev->handler->prep_fn)) { + int ret = sdev->handler->prep_fn(sdev, req); if (ret != BLKPREP_OK) return ret; } diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 4d501b7..fe89d7c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -196,32 +196,13 @@ struct scsi_device { struct execute_work ew; /* used to get process context on put */ struct work_struct requeue_work; - struct scsi_dh_data *scsi_dh_data; + struct scsi_device_handler *handler; + void *handler_data; + enum scsi_device_state sdev_state; unsigned long sdev_data[0]; } __attribute__((aligned(sizeof(unsigned long)))); -typedef void (*activate_complete)(void *, int); -struct scsi_device_handler { - /* Used by the infrastructure */ - struct list_head list; /* list of scsi_device_handlers */ - - /* Filled by the hardware handler */ - struct module *module; - const char *name; - int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *); - struct scsi_dh_data *(*attach)(struct scsi_device *); - void (*detach)(struct scsi_device *); - int (*activate)(struct scsi_device *, activate_complete, void *); - int (*prep_fn)(struct scsi_device *, struct request *); - int (*set_params)(struct scsi_device *, const char *); -}; - -struct scsi_dh_data { - struct scsi_device_handler *scsi_dh; - struct scsi_device *sdev; -}; - #define to_scsi_device(d) \ container_of(d, struct scsi_device, sdev_gendev) #define class_to_sdev(d) \ diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 3a37b4c45..85d7317 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -55,6 +55,23 @@ enum { SCSI_DH_NOSYS, SCSI_DH_DRIVER_MAX, }; + +typedef void (*activate_complete)(void *, int); +struct scsi_device_handler { + /* Used by the infrastructure */ + struct list_head list; /* list of scsi_device_handlers */ + + /* Filled by the hardware handler */ + struct module *module; + const char *name; + int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *); + int (*attach)(struct scsi_device *); + void (*detach)(struct scsi_device *); + int (*activate)(struct scsi_device *, activate_complete, void *); + int (*prep_fn)(struct scsi_device *, struct request *); + int (*set_params)(struct scsi_device *, const char *); +}; + #ifdef CONFIG_SCSI_DH extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); extern int scsi_dh_attach(struct request_queue *, const char *); -- cgit v0.10.2 From e959ed9a44c5239863ca6db42cb37130bee3c7a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:17:00 +0200 Subject: scsi_dh: add a common helper to get a scsi_device from a request_queue And cleanup the various messy opencoded versions of this. Note that this moves the sdev_state checks outside the queue_lock coverage, but as we don't hold the lock over the activation they are only advisory anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 33c1148..2e1db57 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -283,6 +283,20 @@ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh) } EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); +static struct scsi_device *get_sdev_from_queue(struct request_queue *q) +{ + struct scsi_device *sdev; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + sdev = NULL; + spin_unlock_irqrestore(q->queue_lock, flags); + + return sdev; +} + /* * scsi_dh_activate - activate the path associated with the scsi_device * corresponding to the given request queue. @@ -298,41 +312,37 @@ EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); */ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) { - int err = 0; - unsigned long flags; struct scsi_device *sdev; - struct device *dev = NULL; + int err = SCSI_DH_NOSYS; - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; + sdev = get_sdev_from_queue(q); if (!sdev) { - spin_unlock_irqrestore(q->queue_lock, flags); - err = SCSI_DH_NOSYS; if (fn) fn(data, err); return err; } - dev = get_device(&sdev->sdev_gendev); - if (!sdev->handler || !dev || - sdev->sdev_state == SDEV_CANCEL || + if (!sdev->handler) + goto out_fn; + if (sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) - err = SCSI_DH_NOSYS; - if (sdev->sdev_state == SDEV_OFFLINE) - err = SCSI_DH_DEV_OFFLINED; - spin_unlock_irqrestore(q->queue_lock, flags); + goto out_fn; - if (err) { - if (fn) - fn(data, err); - goto out; - } + err = SCSI_DH_DEV_OFFLINED; + if (sdev->sdev_state == SDEV_OFFLINE) + goto out_fn; if (sdev->handler->activate) err = sdev->handler->activate(sdev, fn, data); -out: - put_device(dev); + +out_put_device: + put_device(&sdev->sdev_gendev); return err; + +out_fn: + if (fn) + fn(data, err); + goto out_put_device; } EXPORT_SYMBOL_GPL(scsi_dh_activate); @@ -348,21 +358,15 @@ EXPORT_SYMBOL_GPL(scsi_dh_activate); */ int scsi_dh_set_params(struct request_queue *q, const char *params) { - int err = -SCSI_DH_NOSYS; - unsigned long flags; struct scsi_device *sdev; + int err = -SCSI_DH_NOSYS; - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (sdev->handler && - sdev->handler->set_params && - get_device(&sdev->sdev_gendev)) - err = 0; - spin_unlock_irqrestore(q->queue_lock, flags); - - if (err) + sdev = get_sdev_from_queue(q); + if (!sdev) return err; - err = sdev->handler->set_params(sdev, params); + + if (sdev->handler && sdev->handler->set_params) + err = sdev->handler->set_params(sdev, params); put_device(&sdev->sdev_gendev); return err; } @@ -376,23 +380,19 @@ EXPORT_SYMBOL_GPL(scsi_dh_set_params); */ int scsi_dh_attach(struct request_queue *q, const char *name) { - unsigned long flags; struct scsi_device *sdev; struct scsi_device_handler *scsi_dh; int err = 0; - scsi_dh = scsi_dh_lookup(name); - if (!scsi_dh) - return -EINVAL; - - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - err = -ENODEV; - spin_unlock_irqrestore(q->queue_lock, flags); + sdev = get_sdev_from_queue(q); + if (!sdev) + return -ENODEV; - if (err) - return err; + scsi_dh = scsi_dh_lookup(name); + if (!scsi_dh) { + err = -EINVAL; + goto out_put_device; + } if (sdev->handler) { if (sdev->handler != scsi_dh) @@ -419,22 +419,15 @@ EXPORT_SYMBOL_GPL(scsi_dh_attach); */ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) { - unsigned long flags; struct scsi_device *sdev; const char *handler_name = NULL; - spin_lock_irqsave(q->queue_lock, flags); - sdev = q->queuedata; - if (!sdev || !get_device(&sdev->sdev_gendev)) - sdev = NULL; - spin_unlock_irqrestore(q->queue_lock, flags); - + sdev = get_sdev_from_queue(q); if (!sdev) return NULL; if (sdev->handler) handler_name = kstrdup(sdev->handler->name, gfp); - put_device(&sdev->sdev_gendev); return handler_name; } -- cgit v0.10.2 From d44227749500d8b88a1c079bc04f69187eaf8747 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2015 14:17:01 +0200 Subject: scsi_dh: don't allow to detach device handlers at runtime The I/O submission and completion paths call into the device handler without any synchronization agains detachment. So disallow detaching device handlers at runtime. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 2e1db57..9f6511d 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -181,8 +181,10 @@ store_dh_state(struct device *dev, struct device_attribute *attr, /* * Detach from a device handler */ - scsi_dh_handler_detach(sdev); - err = 0; + sdev_printk(KERN_WARNING, sdev, + "can't detach handler %s.\n", + sdev->handler->name); + err = -EINVAL; } else if (!strncmp(buf, "activate", 8)) { /* * Activate a device handler -- cgit v0.10.2 From 710105fda7a7f350c9fb22e7f61f74c3dc0fe514 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 27 Aug 2015 14:17:02 +0200 Subject: scsi_dh: return SCSI_DH_NOTCONN in scsi_dh_activate() When calling scsi_dh_activate() we should be returning SCSI_DH_NOTCONN if the device handler couldn't be attached. Reviewed-by: Bart van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 9f6511d..edb044a 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -326,6 +326,7 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) if (!sdev->handler) goto out_fn; + err = SCSI_DH_NOTCONN; if (sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) goto out_fn; -- cgit v0.10.2 From ca9f26a27949ba3b295e4f0841c0bec9ef440141 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Mon, 31 Aug 2015 21:18:56 +0800 Subject: spi: mediatek: remove clk_disable_unprepare() This patch removes clk_disable_unprepare() in mtk_spi_remove(). clk_disable_prepare/unprepare must be balance, spi-clk is disabled in mtk_spi_probe, so not needs to disable again. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 5f6315c..b609b1c 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -630,7 +630,6 @@ static int mtk_spi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); mtk_spi_reset(mdata); - clk_disable_unprepare(mdata->spi_clk); spi_master_put(master); return 0; -- cgit v0.10.2 From adcbcfea15d62fab5ba40ac28f9d2a590cc5e5e8 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Mon, 31 Aug 2015 21:18:57 +0800 Subject: spi: mediatek: fix spi clock usage error spi clock manages flow: CLK_TOP_SYSPLL3_D2 ---> CLK_TOP_SPI_SEL ---> CLK_PERI_SPI0 (source clock) (clock mux) (clock gate) spi driver should choose source clock by clock mux, then enable clock gate. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index b609b1c..6fbb5e5 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -85,7 +85,7 @@ struct mtk_spi { void __iomem *base; u32 state; u32 pad_sel; - struct clk *spi_clk, *parent_clk; + struct clk *parent_clk, *sel_clk, *spi_clk; struct spi_transfer *cur_transfer; u32 xfer_len; struct scatterlist *tx_sgl, *rx_sgl; @@ -576,17 +576,24 @@ static int mtk_spi_probe(struct platform_device *pdev) goto err_put_master; } - mdata->spi_clk = devm_clk_get(&pdev->dev, "spi-clk"); - if (IS_ERR(mdata->spi_clk)) { + mdata->parent_clk = devm_clk_get(&pdev->dev, "parent-clk"); + if (IS_ERR(mdata->parent_clk)) { + ret = PTR_ERR(mdata->parent_clk); + dev_err(&pdev->dev, "failed to get parent-clk: %d\n", ret); + goto err_put_master; + } + + mdata->sel_clk = devm_clk_get(&pdev->dev, "sel-clk"); + if (IS_ERR(mdata->sel_clk)) { ret = PTR_ERR(mdata->spi_clk); - dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret); + dev_err(&pdev->dev, "failed to get sel-clk: %d\n", ret); goto err_put_master; } - mdata->parent_clk = devm_clk_get(&pdev->dev, "parent-clk"); - if (IS_ERR(mdata->parent_clk)) { + mdata->spi_clk = devm_clk_get(&pdev->dev, "spi-clk"); + if (IS_ERR(mdata->spi_clk)) { ret = PTR_ERR(mdata->parent_clk); - dev_err(&pdev->dev, "failed to get parent-clk: %d\n", ret); + dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret); goto err_put_master; } @@ -596,7 +603,7 @@ static int mtk_spi_probe(struct platform_device *pdev) goto err_put_master; } - ret = clk_set_parent(mdata->spi_clk, mdata->parent_clk); + ret = clk_set_parent(mdata->sel_clk, mdata->parent_clk); if (ret < 0) { dev_err(&pdev->dev, "failed to clk_set_parent (%d)\n", ret); goto err_disable_clk; -- cgit v0.10.2 From 3d4fe182003bcde778e29e84c14c0c4bb70a452e Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Mon, 31 Aug 2015 21:18:58 +0800 Subject: spi: Mediatek: Document devicetree bindings update for spi bus This patch updates spi bindings, fixs clock usage description. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt index dcefc43..6160ffb 100644 --- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt +++ b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt @@ -15,17 +15,18 @@ Required properties: - interrupts: Should contain spi interrupt - clocks: phandles to input clocks. - The first should be <&topckgen CLK_TOP_SPI_SEL>. - The second should be one of the following. + The first should be one of the following. It's PLL. - <&clk26m>: specify parent clock 26MHZ. - <&topckgen CLK_TOP_SYSPLL3_D2>: specify parent clock 109MHZ. It's the default one. - <&topckgen CLK_TOP_SYSPLL4_D2>: specify parent clock 78MHZ. - <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ. - <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ. + The second should be <&topckgen CLK_TOP_SPI_SEL>. It's clock mux. + The third is <&pericfg CLK_PERI_SPI0>. It's clock gate. -- clock-names: shall be "spi-clk" for the controller clock, and - "parent-clk" for the parent clock. +- clock-names: shall be "parent-clk" for the parent clock, "sel-clk" for the + muxes clock, and "spi-clk" for the clock gate. Optional properties: - mediatek,pad-select: specify which pins group(ck/mi/mo/cs) spi @@ -44,8 +45,11 @@ spi: spi@1100a000 { #size-cells = <0>; reg = <0 0x1100a000 0 0x1000>; interrupts = ; - clocks = <&topckgen CLK_TOP_SPI_SEL>, <&topckgen CLK_TOP_SYSPLL3_D2>; - clock-names = "spi-clk", "parent-clk"; + clocks = <&topckgen CLK_TOP_SYSPLL3_D2>, + <&topckgen CLK_TOP_SPI_SEL>, + <&pericfg CLK_PERI_SPI0>; + clock-names = "parent-clk", "sel-clk", "spi-clk"; + mediatek,pad-select = <0>; status = "disabled"; }; -- cgit v0.10.2 From 6bd3c6f75e0f9baddbf1196a7e3fceabb50c7e3c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 31 Aug 2015 17:07:12 +0200 Subject: ASoC: fsl-asoc-card: put ASRC OF node in case of unknown device In case of unknown DT compatible device the ASRC OF node possibly acquired earlier by of_parse_phandle() has to be put before returning from probe method. Signed-off-by: Maciej Szmigiero Signed-off-by: Mark Brown diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 5aeb6ed..96f55ae 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -488,7 +488,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else { dev_err(&pdev->dev, "unknown Device Tree compatible\n"); - return -EINVAL; + ret = -EINVAL; + goto asrc_fail; } /* Common settings for corresponding Freescale CPU DAI driver */ -- cgit v0.10.2 From d76f41982f2fc88492efd96c7c3178044f32e125 Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Mon, 31 Aug 2015 08:24:23 -0700 Subject: ASoC: Document snd-soc-dummy-dai purpose Signed-off-by: Anatol Pomozov Signed-off-by: Mark Brown diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index 362c69a..53dd085 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -101,6 +101,15 @@ static struct snd_soc_codec_driver dummy_codec; SNDRV_PCM_FMTBIT_S32_LE | \ SNDRV_PCM_FMTBIT_U32_LE | \ SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE) +/* + * The dummy CODEC is only meant to be used in situations where there is no + * actual hardware. + * + * If there is actual hardware even if it does not have a control bus + * the hardware will still have constraints like supported samplerates, etc. + * which should be modelled. And the data flow graph also should be modelled + * using DAPM. + */ static struct snd_soc_dai_driver dummy_dai = { .name = "snd-soc-dummy-dai", .playback = { -- cgit v0.10.2 From 7955323bdcab307bd8b1d5ef7a031e4a3d059da3 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Tue, 18 Aug 2015 17:54:30 +0800 Subject: btrfs: Update out-of-date "skip parity stripe" comment Because btrfs support scrub raid56 parity stripe now. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c69c75e..6f91360 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3280,13 +3280,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, scrub_blocked_if_needed(fs_info); } - /* for raid56, we skip parity stripe */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = get_raid56_logic_offset(physical, num, map, &logical, &stripe_logical); logical += base; if (ret) { + /* it is parity strip */ stripe_logical += base; stripe_end = stripe_logical + increment; ret = scrub_raid56_parity(sctx, map, scrub_dev, -- cgit v0.10.2 From 8c204c9657c32ec5a259ebf852a767afe7efdafa Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Wed, 19 Aug 2015 15:02:40 +0800 Subject: btrfs: Remove noused chunk_tree and chunk_objectid from scrub_enumerate_chunks and scrub_chunk These variables are not used from introduced version, remove them. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6f91360..d64f557 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3493,7 +3493,6 @@ out: static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, - u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, u64 dev_offset, int is_dev_replace) { @@ -3544,8 +3543,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_root *root = sctx->dev_root; struct btrfs_fs_info *fs_info = root->fs_info; u64 length; - u64 chunk_tree; - u64 chunk_objectid; u64 chunk_offset; int ret = 0; int slot; @@ -3609,8 +3606,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (found_key.offset + length <= start) goto skip; - chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); - chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); /* @@ -3643,9 +3638,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; - ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, - chunk_offset, length, found_key.offset, - is_dev_replace); + ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, + found_key.offset, is_dev_replace); /* * flush, submit all pending read and write bios, afterwards -- cgit v0.10.2 From 2c4580454fffbf184fdb9292aa19ab1ffc224add Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 16 Jul 2015 15:00:46 +0800 Subject: btrfs: Cleanup for btrfs_calc_num_tolerated_disk_barrier_failures 1: Use ARRAY_SIZE(types) to replace a static-value variant: int num_types = 4; 2: Use 'continue' on condition to reduce one level tab if (!XXX) { code; ... } -> if (XXX) continue; code; ... 3: Put setting 'num_tolerated_disk_barrier_failures = 2' to (num_tolerated_disk_barrier_failures > 2) condition to make make logic neat. if (num_tolerated_disk_barrier_failures > 0 && XXX) num_tolerated_disk_barrier_failures = 0; else if (num_tolerated_disk_barrier_failures > 1) { if (XXX) num_tolerated_disk_barrier_failures = 1; else if (XXX) num_tolerated_disk_barrier_failures = 2; -> if (num_tolerated_disk_barrier_failures > 0 && XXX) num_tolerated_disk_barrier_failures = 0; if (num_tolerated_disk_barrier_failures > 1 && XXX) num_tolerated_disk_barrier_failures = ; if (num_tolerated_disk_barrier_failures > 2 && XXX) num_tolerated_disk_barrier_failures = 2; 4: Remove comment of: num_mirrors - 1: if RAID1 or RAID10 is configured and more than 2 mirrors are used. which is not fit with code. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cc15514b..51192d9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3449,13 +3449,12 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; - int num_types = 4; int i; int c; int num_tolerated_disk_barrier_failures = (int)fs_info->fs_devices->num_devices; - for (i = 0; i < num_types; i++) { + for (i = 0; i < ARRAY_SIZE(types); i++) { struct btrfs_space_info *tmp; sinfo = NULL; @@ -3473,44 +3472,38 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( down_read(&sinfo->groups_sem); for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { - if (!list_empty(&sinfo->block_groups[c])) { - u64 flags; - - btrfs_get_block_group_info( - &sinfo->block_groups[c], &space); - if (space.total_bytes == 0 || - space.used_bytes == 0) - continue; - flags = space.flags; - /* - * return - * 0: if dup, single or RAID0 is configured for - * any of metadata, system or data, else - * 1: if RAID5 is configured, or if RAID1 or - * RAID10 is configured and only two mirrors - * are used, else - * 2: if RAID6 is configured, else - * num_mirrors - 1: if RAID1 or RAID10 is - * configured and more than - * 2 mirrors are used. - */ - if (num_tolerated_disk_barrier_failures > 0 && - ((flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID0)) || - ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) - == 0))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1) { - if (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID10)) { - num_tolerated_disk_barrier_failures = 1; - } else if (flags & - BTRFS_BLOCK_GROUP_RAID6) { - num_tolerated_disk_barrier_failures = 2; - } - } - } + u64 flags; + + if (list_empty(&sinfo->block_groups[c])) + continue; + + btrfs_get_block_group_info(&sinfo->block_groups[c], + &space); + if (space.total_bytes == 0 || space.used_bytes == 0) + continue; + flags = space.flags; + /* + * return + * 0: if dup, single or RAID0 is configured for + * any of metadata, system or data, else + * 1: if RAID5 is configured, or if RAID1 or + * RAID10 is configured and only two mirrors + * are used, else + * 2: if RAID6 is configured + */ + if (num_tolerated_disk_barrier_failures > 0 && + ((flags & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID0)) || + ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0))) + num_tolerated_disk_barrier_failures = 0; + else if (num_tolerated_disk_barrier_failures > 1 && + (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID10))) + num_tolerated_disk_barrier_failures = 1; + else if (num_tolerated_disk_barrier_failures > 2 && + (flags & BTRFS_BLOCK_GROUP_RAID6)) + num_tolerated_disk_barrier_failures = 2; } up_read(&sinfo->groups_sem); } -- cgit v0.10.2 From 943c6e9925d90dc80207322b5799d95fb90ffec0 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Wed, 19 Aug 2015 15:54:15 +0800 Subject: btrfs: Add raid56 support for updating num_tolerated_disk_barrier_failures in btrfs_balance Code for updating fs_info->num_tolerated_disk_barrier_failures in btrfs_balance() lacks raid56 support. Reason: Above code was wroten in 2012-08-01, together with btrfs_calc_num_tolerated_disk_barrier_failures()'s first version. Then, btrfs_calc_num_tolerated_disk_barrier_failures() got updated later to support raid56, but code in btrfs_balance() was not updated together. Fix: Merge above similar code to a common function: btrfs_get_num_tolerated_disk_barrier_failures() and make it support both case. It can fix this bug with a bonus of cleanup, and make these code never in above no-sync state from now on. Suggested-by: Anand Jain Signed-off-by: Zhao Lei Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51192d9..0b658d0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3440,6 +3440,26 @@ static int barrier_all_devices(struct btrfs_fs_info *info) return 0; } +int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) +{ + if ((flags & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_AVAIL_ALLOC_BIT_SINGLE)) || + ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) + return 0; + + if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID10)) + return 1; + + if (flags & BTRFS_BLOCK_GROUP_RAID6) + return 2; + + pr_warn("BTRFS: unknown raid type: %llu\n", flags); + return 0; +} + int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info) { @@ -3482,28 +3502,11 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( if (space.total_bytes == 0 || space.used_bytes == 0) continue; flags = space.flags; - /* - * return - * 0: if dup, single or RAID0 is configured for - * any of metadata, system or data, else - * 1: if RAID5 is configured, or if RAID1 or - * RAID10 is configured and only two mirrors - * are used, else - * 2: if RAID6 is configured - */ - if (num_tolerated_disk_barrier_failures > 0 && - ((flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID0)) || - ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1 && - (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID10))) - num_tolerated_disk_barrier_failures = 1; - else if (num_tolerated_disk_barrier_failures > 2 && - (flags & BTRFS_BLOCK_GROUP_RAID6)) - num_tolerated_disk_barrier_failures = 2; + + num_tolerated_disk_barrier_failures = min( + num_tolerated_disk_barrier_failures, + btrfs_get_num_tolerated_disk_barrier_failures( + flags)); } up_read(&sinfo->groups_sem); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d4cbfee..bdfb479 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -139,6 +139,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid); int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); +int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info); int __init btrfs_end_io_wq_init(void); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 69520df..644e070 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3585,23 +3585,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } while (read_seqretry(&fs_info->profiles_lock, seq)); if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - int num_tolerated_disk_barrier_failures; - u64 target = bctl->sys.target; - - num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - if (num_tolerated_disk_barrier_failures > 0 && - (target & - (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_AVAIL_ALLOC_BIT_SINGLE))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1 && - (target & - (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) - num_tolerated_disk_barrier_failures = 1; - - fs_info->num_tolerated_disk_barrier_failures = - num_tolerated_disk_barrier_failures; + fs_info->num_tolerated_disk_barrier_failures = min( + btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), + btrfs_get_num_tolerated_disk_barrier_failures( + bctl->sys.target)); } ret = insert_balance_item(fs_info->tree_root, bctl); -- cgit v0.10.2 From c6dd6ea55758cf403bdc07a51a06c2a1d474f906 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 20 Aug 2015 09:30:39 +0800 Subject: btrfs: async_thread: Fix workqueue 'max_active' value when initializing At initializing time, for threshold-able workqueue, it's max_active of kernel workqueue should be 1 and grow if it hits threshold. But due to the bad naming, there is both 'max_active' for kernel workqueue and btrfs workqueue. So wrong value is given at workqueue initialization. This patch fixes it, and to avoid further misunderstanding, change the member name of btrfs_workqueue to 'current_active' and 'limit_active'. Also corresponding comment is added for readability. Reported-by: Alex Lyakas Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 1ce06c84..3e36e4a 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -42,8 +42,14 @@ struct __btrfs_workqueue { /* Thresholding related variants */ atomic_t pending; - int max_active; - int current_max; + + /* Up limit of concurrency workers */ + int limit_active; + + /* Current number of concurrency workers */ + int current_active; + + /* Threshold to change current_active */ int thresh; unsigned int count; spinlock_t thres_lock; @@ -88,7 +94,7 @@ BTRFS_WORK_HELPER(scrubnc_helper); BTRFS_WORK_HELPER(scrubparity_helper); static struct __btrfs_workqueue * -__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, +__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -96,26 +102,31 @@ __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, if (!ret) return NULL; - ret->max_active = max_active; + ret->limit_active = limit_active; atomic_set(&ret->pending, 0); if (thresh == 0) thresh = DFT_THRESHOLD; /* For low threshold, disabling threshold is a better choice */ if (thresh < DFT_THRESHOLD) { - ret->current_max = max_active; + ret->current_active = limit_active; ret->thresh = NO_THRESHOLD; } else { - ret->current_max = 1; + /* + * For threshold-able wq, let its concurrency grow on demand. + * Use minimal max_active at alloc time to reduce resource + * usage. + */ + ret->current_active = 1; ret->thresh = thresh; } if (flags & WQ_HIGHPRI) ret->normal_wq = alloc_workqueue("%s-%s-high", flags, - ret->max_active, - "btrfs", name); + ret->current_active, "btrfs", + name); else ret->normal_wq = alloc_workqueue("%s-%s", flags, - ret->max_active, "btrfs", + ret->current_active, "btrfs", name); if (!ret->normal_wq) { kfree(ret); @@ -134,7 +145,7 @@ __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, unsigned int flags, - int max_active, + int limit_active, int thresh) { struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -143,14 +154,14 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, return NULL; ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, - max_active, thresh); + limit_active, thresh); if (!ret->normal) { kfree(ret); return NULL; } if (flags & WQ_HIGHPRI) { - ret->high = __btrfs_alloc_workqueue(name, flags, max_active, + ret->high = __btrfs_alloc_workqueue(name, flags, limit_active, thresh); if (!ret->high) { __btrfs_destroy_workqueue(ret->normal); @@ -180,7 +191,7 @@ static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) */ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) { - int new_max_active; + int new_current_active; long pending; int need_change = 0; @@ -197,7 +208,7 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) wq->count %= (wq->thresh / 4); if (!wq->count) goto out; - new_max_active = wq->current_max; + new_current_active = wq->current_active; /* * pending may be changed later, but it's OK since we really @@ -205,19 +216,19 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) */ pending = atomic_read(&wq->pending); if (pending > wq->thresh) - new_max_active++; + new_current_active++; if (pending < wq->thresh / 2) - new_max_active--; - new_max_active = clamp_val(new_max_active, 1, wq->max_active); - if (new_max_active != wq->current_max) { + new_current_active--; + new_current_active = clamp_val(new_current_active, 1, wq->limit_active); + if (new_current_active != wq->current_active) { need_change = 1; - wq->current_max = new_max_active; + wq->current_active = new_current_active; } out: spin_unlock(&wq->thres_lock); if (need_change) { - workqueue_set_max_active(wq->normal_wq, wq->current_max); + workqueue_set_max_active(wq->normal_wq, wq->current_active); } } @@ -351,13 +362,13 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) kfree(wq); } -void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) +void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active) { if (!wq) return; - wq->normal->max_active = max; + wq->normal->limit_active = limit_active; if (wq->high) - wq->high->max_active = max; + wq->high->limit_active = limit_active; } void btrfs_set_work_high_priority(struct btrfs_work *work) diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index b0b093b..ad4d064 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -69,7 +69,7 @@ BTRFS_WORK_HELPER_PROTO(scrubparity_helper); struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, unsigned int flags, - int max_active, + int limit_active, int thresh); void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, -- cgit v0.10.2 From 527afb4493c2892ce89fb74648e72a30b68ba120 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 19 Aug 2015 14:55:00 +0900 Subject: Btrfs: cleanup: remove unnecessary check before btrfs_free_path is called We need not check path before btrfs_free_path() is called because path is checked in btrfs_free_path(). Signed-off-by: Tsutomu Itoh Reviewed-by: Qu Wenruo Signed-off-by: Chris Mason diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564a7de..e54dd59 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -183,8 +183,7 @@ no_valid_dev_replace_entry_found: } out: - if (path) - btrfs_free_path(path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bda3c41..37dd8d0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6905,8 +6905,7 @@ out: trace_btrfs_get_extent(root, em); - if (path) - btrfs_free_path(path); + btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); if (!err) diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a4b9c8b..f31db43 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -115,8 +115,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } out: - if (path) - btrfs_free_path(path); + btrfs_free_path(path); if (ret == -EAGAIN) { if (root->defrag_max.objectid > root->defrag_progress.objectid) goto done; -- cgit v0.10.2 From 51d2eeef1d958ef6834b24f548194f5acea0f499 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 1 Sep 2015 11:14:05 +0530 Subject: ASoC: wm0010: fix memory leak We were aborting if the kzalloc of img_swap fails but without freeing the already allocated out. Similarly we were aborting if spi_sync fails without releasing out and img_swap. Signed-off-by: Sudip Mukherjee Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 6560a66..9d370a4 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -672,8 +672,10 @@ static int wm0010_boot(struct snd_soc_codec *codec) } img_swap = kzalloc(len, GFP_KERNEL | GFP_DMA); - if (!img_swap) + if (!img_swap) { + kfree(out); goto abort; + } /* We need to re-order for 0010 */ byte_swap_64((u64 *)&pll_rec, img_swap, len); @@ -690,6 +692,8 @@ static int wm0010_boot(struct snd_soc_codec *codec) ret = spi_sync(spi, &m); if (ret != 0) { dev_err(codec->dev, "First PLL write failed: %d\n", ret); + kfree(img_swap); + kfree(out); goto abort; } @@ -697,6 +701,8 @@ static int wm0010_boot(struct snd_soc_codec *codec) ret = spi_sync(spi, &m); if (ret != 0) { dev_err(codec->dev, "Second PLL write failed: %d\n", ret); + kfree(img_swap); + kfree(out); goto abort; } -- cgit v0.10.2 From 31e09b18c863718939e3e9c30eee55f9011d85ee Mon Sep 17 00:00:00 2001 From: Linda Knippers Date: Tue, 1 Sep 2015 15:41:55 -0400 Subject: x86/mm/srat: Print non-volatile flag in SRAT With the addition of NVDIMM support, a question came up as to whether NVDIMM ranges should be in the SRAT with this bit set. I think the consensus was no because the ranges are in the NFIT with proximity domain information there. ACPI is not clear on the meaning of this bit in the SRAT. If someone is setting it, we might want to ask them what they expect to happen with it. Right now this bit is only printed if all the ACPI debug information is turned on. Signed-off-by: Linda Knippers Acked-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150901194154.GA4939@ljkz400 Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a6..c2aea63 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : ""); + hotpluggable ? " hotplug" : "", + ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); /* Mark hotplug range in memblock. */ if (hotpluggable && memblock_mark_hotplug(start, ma->length)) -- cgit v0.10.2 From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001 From: Vatika Harlalka Date: Tue, 1 Sep 2015 16:50:59 +0200 Subject: nohz: Affine unpinned timers to housekeepers The problem addressed in this patch is about affining unpinned timers. Adaptive or Full Dynticks CPUs are currently disturbed by unnecessary jitter due to firing of such timers on them. This patch will affine timers to online CPUs which are not full dynticks in NOHZ_FULL configured systems. It should not introduce overhead in nohz full off case due to static keys. Signed-off-by: Vatika Harlalka Signed-off-by: Frederic Weisbecker Reviewed-by: Preeti U Murthy Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f..e312219 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8b864ec..0902e4d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -623,18 +623,21 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { for_each_cpu(i, sched_domain_span(sd)) { - if (!idle_cpu(i)) { + if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { cpu = i; goto unlock; } } } + + if (!is_housekeeping_cpu(cpu)) + cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); return cpu; -- cgit v0.10.2 From 7c8bb6cb95061b3143759459ed6c6b0c73bcfecb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 1 Sep 2015 16:51:00 +0200 Subject: nohz: Assert existing housekeepers when nohz full enabled The code ensures that when nohz full is running, at least the boot CPU serves as a housekeeper and it can't be later offlined. Let's assert this assumption to make sure that we have CPUs to handle unbound jobs like workqueues and timers while nohz full CPUs run undisturbed. Also improve the comments on housekeeper offlining prevention. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Vatika Harlalka Link: http://lkml.kernel.org/r/1441119060-2230-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16..7c7ec45 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) __setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: /* - * If we handle the timekeeping duty for full dynticks CPUs, - * we can't safely shutdown that CPU. + * The boot CPU handles housekeeping duty (unbound timers, + * workqueues, timekeeping, ...) on behalf of full dynticks + * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; @@ -370,6 +371,12 @@ void __init tick_nohz_init(void) cpu_notifier(tick_nohz_cpu_down_callback, 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); + + /* + * We need at least one CPU to handle housekeeping work such + * as timekeeping, unbound timers, workqueues, ... + */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif -- cgit v0.10.2 From 02bc933ebb59208f42c2e6305b2c17fd306f695d Mon Sep 17 00:00:00 2001 From: "Tan, Jui Nee" Date: Tue, 1 Sep 2015 10:22:51 +0800 Subject: spi: spi-pxa2xx: Check status register to determine if SSSR_TINT is disabled On Intel Baytrail, there is case when interrupt handler get called, no SPI message is captured. The RX FIFO is indeed empty when RX timeout pending interrupt (SSSR_TINT) happens. Use the BIOS version where both HSUART and SPI are on the same IRQ. Both drivers are using IRQF_SHARED when calling the request_irq function. When running two separate and independent SPI and HSUART application that generate data traffic on both components, user will see messages like below on the console: pxa2xx-spi pxa2xx-spi.0: bad message state in interrupt handler This commit will fix this by first checking Receiver Time-out Interrupt, if it is disabled, ignore the request and return without servicing. Signed-off-by: Tan, Jui Nee Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 7293d6d..8e4b1a7 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -643,6 +643,10 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (!(sccr1_reg & SSCR1_TIE)) mask &= ~SSSR_TFS; + /* Ignore RX timeout interrupt if it is disabled */ + if (!(sccr1_reg & SSCR1_TINTE)) + mask &= ~SSSR_TINT; + if (!(status & mask)) return IRQ_NONE; -- cgit v0.10.2 From 9f8df6adf6c53abfa10e243cb289c34eb18ec581 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Wed, 2 Sep 2015 16:14:06 +0530 Subject: regulator: core: fix possible NULL dereference We were checking rdev->supply for NULL after dereferencing it. Lets check for rdev->supply along with _regulator_is_enabled() and call regulator_enable() only if rdev->supply is not NULL. Signed-off-by: Sudip Mukherjee Signed-off-by: Mark Brown diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index de9f272..7150ff6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1422,11 +1422,10 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) return ret; /* Cascade always-on state to supply */ - if (_regulator_is_enabled(rdev)) { + if (_regulator_is_enabled(rdev) && rdev->supply) { ret = regulator_enable(rdev->supply); if (ret < 0) { - if (rdev->supply) - _regulator_put(rdev->supply); + _regulator_put(rdev->supply); return ret; } } -- cgit v0.10.2 From 5ad4da4302712fba10624d28cb6c269fee592b69 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Sep 2015 09:56:31 +0200 Subject: perf tools: Fix parse_events_add_pmu caller Following commit changed parse_events_add_pmu interface: 36adec85a86f perf tools: Change parse_events_add_pmu interface but forgot to change one caller. Because of lessen compilation rules for the bison parser, the compiler did not warn on that. Signed-off-by: Jiri Olsa Cc: Raphael Beamonte Cc: David Ahern Cc: Matt Fleming Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Fixes: 36adec85a86f ("perf tools: Change parse_events_add_pmu interface") Link: http://lkml.kernel.org/r/1441180605-24737-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a..9cd7081 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -255,7 +255,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } -- cgit v0.10.2 From 53ff6bc37be449f546158a39c528d7814dfb15a1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 18 Aug 2015 12:07:05 +0300 Subject: perf tools: Fix use of wrong event when processing exit events In a couple of cases the 'comm' member of 'union event' has been used instead of the correct member ('fork') when processing exit events. In the cases where it has been used incorrectly, only the 'pid' and 'tid' are affected. The 'pid' value would be correct anyway because it is in the same position in 'comm' and 'fork' events, but the 'tid' would have been incorrectly assigned from 'ppid'. However, for exit events, the kernel puts the current task in the 'ppid' and 'ttid' which is the same as the exiting task. That is 'ppid' == 'pid' and if the task is not multi-threaded, 'pid' == 'tid' i.e. the data goes wrong only when tracing multi-threaded programs. It is hard to find an example of how this would produce an error in practice. There are 3 occurences of the fix: 1. perf script is only affected if !sample_id_all which only happens on old kernels. 2. intel_pt is only affected when decoding without timestamps and would probably still decode correctly - the exit event is only used to flush out data which anyway gets flushed at the end of the session 3. intel_bts also uses the exit event to flush data which would probably not cause errors as it would get flushed at the end of the session instead Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1439888825-27708-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index eb51325..284a76e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -768,8 +768,8 @@ static int process_exit_event(struct perf_tool *tool, if (!evsel->attr.sample_id_all) { sample->cpu = 0; sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ea76862..eb0e7f8 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -623,7 +623,7 @@ static int intel_bts_process_event(struct perf_session *session, if (err) return err; if (event->header.type == PERF_RECORD_EXIT) { - err = intel_bts_process_tid_exit(bts, event->comm.tid); + err = intel_bts_process_tid_exit(bts, event->fork.tid); if (err) return err; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bb41c20..535d86f 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1494,7 +1494,7 @@ static int intel_pt_process_event(struct perf_session *session, if (pt->timeless_decoding) { if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, - event->comm.tid, + event->fork.tid, sample->time); } } else if (timestamp) { -- cgit v0.10.2 From 5014c311baa2b21384321fa4a9f617a92e3e56f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Sep 2015 16:46:02 -0600 Subject: block: fix bogus compiler warnings in blk-merge.c The compiler can't figure out that bvprv is initialized whenever 'prev' is set to 1 as well. Use a pointer to bvprv instead, setting it to NULL initially, and get rid of the 'prev' tracking. This dumbs it down enough that gcc is happy. Signed-off-by: Jens Axboe diff --git a/block/blk-merge.c b/block/blk-merge.c index d088cff..cce23ba 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -67,10 +67,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio_set *bs) { struct bio *split; - struct bio_vec bv, bvprv; + struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; unsigned seg_size = 0, nsegs = 0, sectors = 0; - int prev = 0; bio_for_each_segment(bv, bio, iter) { sectors += bv.bv_len >> 9; @@ -82,20 +81,20 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. */ - if (prev && bvec_gap_to_prev(q, &bvprv, bv.bv_offset)) + if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) goto split; - if (prev && blk_queue_cluster(q)) { + if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) + if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) + if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) goto new_segment; seg_size += bv.bv_len; bvprv = bv; - prev = 1; + bvprvp = &bv; continue; } new_segment: @@ -104,7 +103,7 @@ new_segment: nsegs++; bvprv = bv; - prev = 1; + bvprvp = &bv; seg_size = bv.bv_len; } -- cgit v0.10.2 From de65d2d26f81f7f84c7258b3a137f20e8fa5bb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 31 Aug 2015 14:17:18 +0200 Subject: null_blk: fix memory leak on cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver was not freeing the memory allocated for internal nullb queues. This patch frees the memory during driver unload. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 17269a3..d394a85 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -406,6 +406,22 @@ static struct blk_mq_ops null_mq_ops = { .complete = null_softirq_done_fn, }; +static void cleanup_queue(struct nullb_queue *nq) +{ + kfree(nq->tag_map); + kfree(nq->cmds); +} + +static void cleanup_queues(struct nullb *nullb) +{ + int i; + + for (i = 0; i < nullb->nr_queues; i++) + cleanup_queue(&nullb->queues[i]); + + kfree(nullb->queues); +} + static void null_del_dev(struct nullb *nullb) { list_del_init(&nullb->list); @@ -415,6 +431,7 @@ static void null_del_dev(struct nullb *nullb) if (queue_mode == NULL_Q_MQ) blk_mq_free_tag_set(&nullb->tag_set); put_disk(nullb->disk); + cleanup_queues(nullb); kfree(nullb); } @@ -459,22 +476,6 @@ static int setup_commands(struct nullb_queue *nq) return 0; } -static void cleanup_queue(struct nullb_queue *nq) -{ - kfree(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - static int setup_queues(struct nullb *nullb) { nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), -- cgit v0.10.2 From 5fdb7e1b976dc9d18aff8c711e51d17c4c324a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 31 Aug 2015 14:17:31 +0200 Subject: null_blk: fix wrong capacity when bs is not 512 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_capacity() sets device's capacity using 512 bytes sectors. null_blk calculates the number of sectors by size / bs, which set_capacity is called with. This led to null_blk exposing the wrong number of sectors when bs is not 512 bytes. Signed-off-by: Matias Bjørling Reviewed-by: Ross Zwisler Signed-off-by: Jens Axboe diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index d394a85..a295b98 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -589,8 +589,7 @@ static int null_add_dev(void) blk_queue_physical_block_size(nullb->q, bs); size = gb * 1024 * 1024 * 1024ULL; - sector_div(size, bs); - set_capacity(disk, size); + set_capacity(disk, size >> 9); disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->major = null_major; -- cgit v0.10.2 From ac0e137ab0da80e8fc0db2027598e2f7f82a5a02 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 20:27:32 -0700 Subject: clk: h8s2678: Fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent cleanup removed some include files without checking if the cleaned up code still compiles. This results in the following compile error. drivers/clk/h8300/clk-h8s2678.c: In function ‘h8s2678_pll_clk_setup’: drivers/clk/h8300/clk-h8s2678.c:99:14: error: implicit declaration of function ‘kzalloc’ drivers/clk/h8300/clk-h8s2678.c:138:2: error: implicit declaration of function ‘kfree’ Cc: Yoshinori Sato Signed-off-by: Guenter Roeck Signed-off-by: Stephen Boyd diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c index 2a38eb4..6cf38dc 100644 --- a/drivers/clk/h8300/clk-h8s2678.c +++ b/drivers/clk/h8300/clk-h8s2678.c @@ -8,6 +8,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(clklock); -- cgit v0.10.2 From 21dd33b09c61597df603c654589adffd7955491a Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Wed, 2 Sep 2015 16:18:57 -0600 Subject: kernel/cpu_pm: fix cpu_cluster_pm_exit comment cpu_cluster_pm_exit() must be sent after cpu_cluster_pm_enter() has been sent for the cluster and before any cpu_pm_exit() notifications are sent for any CPU. Cc: Nicolas Pitre Acked-by: Kevin Hilman Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 9656a3c..009cc9a 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); * low power state that may have caused some blocks in the same power domain * to reset. * - * Must be called after cpu_pm_exit has been called on all cpus in the power + * Must be called after cpu_cluster_pm_enter has been called for the power * domain, and before cpu_pm_exit has been called on any cpu in the power * domain. Notified drivers can include VFP co-processor, interrupt controller * and its PM extensions, local CPU timers context save/restore which -- cgit v0.10.2 From 7d5d0c8ba369cbfb68eec6912f35197d82214668 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 2 Sep 2015 14:36:48 +0530 Subject: cpufreq: dt: Check OPP count before marking them shared We need to explicitly mark OPPs as shared, when they are not defined with OPP-v2 bindings. But this isn't required to be done if we failed to initialize OPP table. Reorder code to verify OPP count before marking them shared. Fixes: 2e02d8723edf ("cpufreq: dt: Add support for operating-points-v2 bindings") Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index c3583cd..8c38b51 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -239,6 +239,17 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ of_cpumask_init_opp_table(policy->cpus); + /* + * But we need OPP table to function so if it is not there let's + * give platform code chance to provide it for us. + */ + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + pr_debug("OPP table is not ready, deferring probe\n"); + ret = -EPROBE_DEFER; + goto out_free_opp; + } + if (need_update) { struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); @@ -256,17 +267,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); } - /* - * But we need OPP table to function so if it is not there let's - * give platform code chance to provide it for us. - */ - ret = dev_pm_opp_get_opp_count(cpu_dev); - if (ret <= 0) { - pr_debug("OPP table is not ready, deferring probe\n"); - ret = -EPROBE_DEFER; - goto out_free_opp; - } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { ret = -ENOMEM; -- cgit v0.10.2 From 8bc862843901e282e58f5ecd66f1df24366ecb6b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 2 Sep 2015 14:36:49 +0530 Subject: cpufreq: dt: Print error on failing to mark OPPs as shared We need to explicitly mark OPPs as shared, when they are not defined with OPP-v2 bindings. This operation can potentially fail, and in that case we should at least print an error message. Fixes: 2e02d8723edf ("cpufreq: dt: Add support for operating-points-v2 bindings") Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 8c38b51..b1131cf 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -260,7 +260,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) * OPP tables are initialized only for policy->cpu, do it for * others as well. */ - set_cpus_sharing_opps(cpu_dev, policy->cpus); + ret = set_cpus_sharing_opps(cpu_dev, policy->cpus); + if (ret) + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", + __func__, ret); of_property_read_u32(np, "clock-latency", &transition_latency); } else { -- cgit v0.10.2 From a2022001cebd0825b96aa0f3345ea3ad44ae79d4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 2 Sep 2015 14:36:50 +0530 Subject: cpufreq: dt: Tolerance applies on both sides of target voltage Tolerance applies on both sides of the target voltage, i.e. both min and max sides. But while checking if a voltage is supported by the regulator or not, we haven't taken care of tolerance on the lower side. Fix that. Cc: Lucas Stach Fixes: 045ee45c4ff2 ("cpufreq: cpufreq-dt: disable unsupported OPPs") Signed-off-by: Viresh Kumar Reviewed-by: Lucas Stach Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index b1131cf..3b64c20 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -303,7 +303,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) rcu_read_unlock(); tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, opp_uV, + if (regulator_is_supported_voltage(cpu_reg, + opp_uV - tol_uV, opp_uV + tol_uV)) { if (opp_uV < min_uV) min_uV = opp_uV; -- cgit v0.10.2 From 29e47e2173349ee06bd339f7753821c720d50923 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Sep 2015 10:16:13 +0200 Subject: PM / Domains: Try power off masters in error path of __pm_genpd_poweron() While powering up a genpd, its domain masters are first being powered up. In the error path of __pm_genpd_poweron(), we didn't care to try power off these domain masters. Let's deal with that to avoid leaving unused PM domains powered. Signed-off-by: Ulf Hansson Reviewed-by: Krzysztof Kozlowski Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4167201..62f7572 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -213,6 +213,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed) } /** + * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff(). + * @genpd: PM domait to power off. + * + * Queue up the execution of pm_genpd_poweroff() unless it's already been done + * before. + */ +static void genpd_queue_power_off_work(struct generic_pm_domain *genpd) +{ + queue_work(pm_wq, &genpd->power_off_work); +} + +/** * __pm_genpd_poweron - Restore power to a given PM domain and its masters. * @genpd: PM domain to power up. * @@ -259,8 +271,12 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd) return 0; err: - list_for_each_entry_continue_reverse(link, &genpd->slave_links, slave_node) + list_for_each_entry_continue_reverse(link, + &genpd->slave_links, + slave_node) { genpd_sd_counter_dec(link->master); + genpd_queue_power_off_work(link->master); + } return ret; } @@ -349,18 +365,6 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, } /** - * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff(). - * @genpd: PM domait to power off. - * - * Queue up the execution of pm_genpd_poweroff() unless it's already been done - * before. - */ -static void genpd_queue_power_off_work(struct generic_pm_domain *genpd) -{ - queue_work(pm_wq, &genpd->power_off_work); -} - -/** * pm_genpd_poweroff - Remove power from a given PM domain. * @genpd: PM domain to power down. * -- cgit v0.10.2 From abceaa9cded5f059f8c3b3b6f32730084fe5e39f Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 31 Aug 2015 11:34:05 +0800 Subject: cpuidle/coupled: Add sanity check for safe_state_index Since we are using cpuidle_driver::safe_state_index directly as the target state index, it is better to add the sanity check at the point of registering the driver. Signed-off-by: Xunlei Pang Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 1523e2d..344058f 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -187,6 +187,28 @@ bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state) } /** + * cpuidle_coupled_state_verify - check if the coupled states are correctly set. + * @drv: struct cpuidle_driver for the platform + * + * Returns 0 for valid state values, a negative error code otherwise: + * * -EINVAL if any coupled state(safe_state_index) is wrongly set. + */ +int cpuidle_coupled_state_verify(struct cpuidle_driver *drv) +{ + int i; + + for (i = drv->state_count - 1; i >= 0; i--) { + if (cpuidle_state_is_coupled(drv, i) && + (drv->safe_state_index == i || + drv->safe_state_index < 0 || + drv->safe_state_index >= drv->state_count)) + return -EINVAL; + } + + return 0; +} + +/** * cpuidle_coupled_set_ready - mark a cpu as ready * @coupled: the struct coupled that contains the current cpu */ diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 178c5ad..f87f399 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -35,6 +35,7 @@ extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state); +int cpuidle_coupled_state_verify(struct cpuidle_driver *drv); int cpuidle_enter_state_coupled(struct cpuidle_device *dev, struct cpuidle_driver *drv, int next_state); int cpuidle_coupled_register_device(struct cpuidle_device *dev); @@ -46,6 +47,11 @@ bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state) return false; } +static inline int cpuidle_coupled_state_verify(struct cpuidle_driver *drv) +{ + return 0; +} + static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev, struct cpuidle_driver *drv, int next_state) { diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 5db1478..389ade4 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -227,6 +227,10 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) if (!drv || !drv->state_count) return -EINVAL; + ret = cpuidle_coupled_state_verify(drv); + if (ret) + return ret; + if (cpuidle_disabled()) return -ENODEV; -- cgit v0.10.2 From b9c93646fd5cb669d096fec5ad25a01f04cfde27 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 3 Sep 2015 12:20:37 +0530 Subject: regulator: pbias: program pbias register offset in pbias driver Add separate compatible strings for every platform and populate the pbias register offset in the driver data. This helps avoid depending on the dt for pbias register offset. Also update the dt binding documentation for the new compatible strings. Suggested-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Acked-by: Tony Lindgren Signed-off-by: Mark Brown diff --git a/Documentation/devicetree/bindings/regulator/pbias-regulator.txt b/Documentation/devicetree/bindings/regulator/pbias-regulator.txt index 32aa26f..acbcb45 100644 --- a/Documentation/devicetree/bindings/regulator/pbias-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/pbias-regulator.txt @@ -2,7 +2,12 @@ PBIAS internal regulator for SD card dual voltage i/o pads on OMAP SoCs. Required properties: - compatible: - - "ti,pbias-omap" for OMAP2, OMAP3, OMAP4, OMAP5, DRA7. + - should be "ti,pbias-dra7" for DRA7 + - should be "ti,pbias-omap2" for OMAP2 + - should be "ti,pbias-omap3" for OMAP3 + - should be "ti,pbias-omap4" for OMAP4 + - should be "ti,pbias-omap5" for OMAP5 + - "ti,pbias-omap" is deprecated - reg: pbias register offset from syscon base and size of pbias register. - syscon : phandle of the system control module - regulator-name : should be diff --git a/drivers/regulator/pbias-regulator.c b/drivers/regulator/pbias-regulator.c index bd2b75c..c21cedb 100644 --- a/drivers/regulator/pbias-regulator.c +++ b/drivers/regulator/pbias-regulator.c @@ -44,6 +44,10 @@ struct pbias_regulator_data { int voltage; }; +struct pbias_of_data { + unsigned int offset; +}; + static const unsigned int pbias_volt_table[] = { 1800000, 3000000 @@ -98,8 +102,35 @@ static struct of_regulator_match pbias_matches[] = { }; #define PBIAS_NUM_REGS ARRAY_SIZE(pbias_matches) +/* Offset from SCM general area (and syscon) base */ + +static const struct pbias_of_data pbias_of_data_omap2 = { + .offset = 0x230, +}; + +static const struct pbias_of_data pbias_of_data_omap3 = { + .offset = 0x2b0, +}; + +static const struct pbias_of_data pbias_of_data_omap4 = { + .offset = 0x60, +}; + +static const struct pbias_of_data pbias_of_data_omap5 = { + .offset = 0x60, +}; + +static const struct pbias_of_data pbias_of_data_dra7 = { + .offset = 0xe00, +}; + static const struct of_device_id pbias_of_match[] = { { .compatible = "ti,pbias-omap", }, + { .compatible = "ti,pbias-omap2", .data = &pbias_of_data_omap2, }, + { .compatible = "ti,pbias-omap3", .data = &pbias_of_data_omap3, }, + { .compatible = "ti,pbias-omap4", .data = &pbias_of_data_omap4, }, + { .compatible = "ti,pbias-omap5", .data = &pbias_of_data_omap5, }, + { .compatible = "ti,pbias-dra7", .data = &pbias_of_data_dra7, }, {}, }; MODULE_DEVICE_TABLE(of, pbias_of_match); @@ -114,6 +145,9 @@ static int pbias_regulator_probe(struct platform_device *pdev) const struct pbias_reg_info *info; int ret = 0; int count, idx, data_idx = 0; + const struct of_device_id *match; + const struct pbias_of_data *data; + unsigned int offset; count = of_regulator_match(&pdev->dev, np, pbias_matches, PBIAS_NUM_REGS); @@ -129,6 +163,20 @@ static int pbias_regulator_probe(struct platform_device *pdev) if (IS_ERR(syscon)) return PTR_ERR(syscon); + match = of_match_device(of_match_ptr(pbias_of_match), &pdev->dev); + if (match && match->data) { + data = match->data; + offset = data->offset; + } else { + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + offset = res->start; + dev_WARN(&pdev->dev, + "using legacy dt data for pbias offset\n"); + } + cfg.regmap = syscon; cfg.dev = &pdev->dev; @@ -141,10 +189,6 @@ static int pbias_regulator_probe(struct platform_device *pdev) if (!info) return -ENODEV; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EINVAL; - drvdata[data_idx].syscon = syscon; drvdata[data_idx].info = info; drvdata[data_idx].desc.name = info->name; @@ -154,9 +198,9 @@ static int pbias_regulator_probe(struct platform_device *pdev) drvdata[data_idx].desc.volt_table = pbias_volt_table; drvdata[data_idx].desc.n_voltages = 2; drvdata[data_idx].desc.enable_time = info->enable_time; - drvdata[data_idx].desc.vsel_reg = res->start; + drvdata[data_idx].desc.vsel_reg = offset; drvdata[data_idx].desc.vsel_mask = info->vmode; - drvdata[data_idx].desc.enable_reg = res->start; + drvdata[data_idx].desc.enable_reg = offset; drvdata[data_idx].desc.enable_mask = info->enable_mask; drvdata[data_idx].desc.enable_val = info->enable; -- cgit v0.10.2 From 5e7c4274a70aa2d6f485996d0ca1dad52d0039ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2015 19:28:20 +0300 Subject: block: Check for gaps on front and back merges We are checking for gaps to previous bio_vec, which can only detect back merges gaps. Moreover, at the point where we check for a gap, we don't know if we will attempt a back or a front merge. Thus, check for gap to prev in a back merge attempt and check for a gap to next in a front merge attempt. Signed-off-by: Jens Axboe [sagig: Minor rename change] Signed-off-by: Sagi Grimberg diff --git a/block/blk-merge.c b/block/blk-merge.c index cce23ba..d9eddbc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -438,6 +438,8 @@ no_merge: int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { + if (req_gap_back_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; @@ -456,6 +458,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { + + if (req_gap_front_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; @@ -482,14 +487,6 @@ static bool req_no_special_merge(struct request *req) return !q->mq_ops && req->special; } -static int req_gap_to_prev(struct request *req, struct bio *next) -{ - struct bio *prev = req->biotail; - - return bvec_gap_to_prev(req->q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); -} - static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { @@ -504,7 +501,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (req_no_special_merge(req) || req_no_special_merge(next)) return 0; - if (req_gap_to_prev(req, next->bio)) + if (req_gap_back_merge(req, next->bio)) return 0; /* @@ -712,10 +709,6 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) !blk_write_same_mergeable(rq->bio, bio)) return false; - /* Only check gaps if the bio carries data */ - if (bio_has_data(bio) && req_gap_to_prev(rq, bio)) - return false; - return true; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a622f27..2ff94de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1368,6 +1368,26 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); } +static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, + struct bio *next) +{ + if (!bio_has_data(prev)) + return false; + + return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], + next->bi_io_vec[0].bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, bio, req->bio); +} + struct work_struct; int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); -- cgit v0.10.2 From 9f42a89da6b4dc015631e01ba990d3db2cae2a1b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 2 Sep 2015 10:57:47 +0800 Subject: clk: Hi6220: separately build stub clock driver The previous code, kernel builds Hi6220's common clock driver and stub clock driver together. Stub clock driver has introduced the dependency with CONFIG_MAILBOX, so kernel will not build Hi6220's common clock driver due ARM64's defconfig have not enabled CONFIG_MAILBOX by default. So separately build stub clock driver and common clock driver for Hi6220; and only let stub clock driver has the dependency with CONFIG_MAILBOX. Signed-off-by: Leo Yan Tested-by: Kevin Hilman Signed-off-by: Stephen Boyd diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 2c16807..e434854 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -1,6 +1,12 @@ config COMMON_CLK_HI6220 bool "Hi6220 Clock Driver" - depends on (ARCH_HISI || COMPILE_TEST) && MAILBOX + depends on ARCH_HISI || COMPILE_TEST default ARCH_HISI help Build the Hisilicon Hi6220 clock driver based on the common clock framework. + +config STUB_CLK_HI6220 + bool "Hi6220 Stub Clock Driver" + depends on COMMON_CLK_HI6220 && MAILBOX + help + Build the Hisilicon Hi6220 stub clock driver. diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile index 4a1001a..74dba31 100644 --- a/drivers/clk/hisilicon/Makefile +++ b/drivers/clk/hisilicon/Makefile @@ -7,4 +7,5 @@ obj-y += clk.o clkgate-separated.o clkdivider-hi6220.o obj-$(CONFIG_ARCH_HI3xxx) += clk-hi3620.o obj-$(CONFIG_ARCH_HIP04) += clk-hip04.o obj-$(CONFIG_ARCH_HIX5HD2) += clk-hix5hd2.o -obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o clk-hi6220-stub.o +obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o +obj-$(CONFIG_STUB_CLK_HI6220) += clk-hi6220-stub.o -- cgit v0.10.2 From 4aeca98c49e9873e18b27540a0ba18b22e1b9424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Thu, 3 Sep 2015 10:04:49 -0700 Subject: Input: elan_i2c - add ACPI ID "ELAN1000" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ACPI ID present in the DSDT of the ASUS E202SA laptop. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index e2b7420..fa94530 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1170,6 +1170,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN1000", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, elan_acpi_id); -- cgit v0.10.2 From f2bc114b82ecbd32b3581bebdf40f1f2c4e9941b Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 3 Sep 2015 10:47:25 -0700 Subject: Input: cyttsp - remove unnecessary MODULE_ALIAS() The drivers have a I2C device ID table that is used to create the module aliases and also "cyttsp" and "cyttsp4" are not supported I2C device IDs so these module aliases are never used. Signed-off-by: Javier Martinez Canillas Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/cyttsp4_i2c.c b/drivers/input/touchscreen/cyttsp4_i2c.c index 9a323dd..a9f95c7 100644 --- a/drivers/input/touchscreen/cyttsp4_i2c.c +++ b/drivers/input/touchscreen/cyttsp4_i2c.c @@ -86,4 +86,3 @@ module_i2c_driver(cyttsp4_i2c_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cypress TrueTouch(R) Standard Product (TTSP) I2C driver"); MODULE_AUTHOR("Cypress"); -MODULE_ALIAS("i2c:cyttsp4"); diff --git a/drivers/input/touchscreen/cyttsp_i2c.c b/drivers/input/touchscreen/cyttsp_i2c.c index 519e2de..eee51b3 100644 --- a/drivers/input/touchscreen/cyttsp_i2c.c +++ b/drivers/input/touchscreen/cyttsp_i2c.c @@ -86,4 +86,3 @@ module_i2c_driver(cyttsp_i2c_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cypress TrueTouch(R) Standard Product (TTSP) I2C driver"); MODULE_AUTHOR("Cypress"); -MODULE_ALIAS("i2c:cyttsp"); -- cgit v0.10.2 From e4dbe796285d96976586487fe0555f678c95b60f Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 10:49:17 -0700 Subject: Input: ab8500-ponkey - Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c index 1f7e15c..4f5ef5b 100644 --- a/drivers/input/misc/ab8500-ponkey.c +++ b/drivers/input/misc/ab8500-ponkey.c @@ -118,6 +118,7 @@ static const struct of_device_id ab8500_ponkey_match[] = { { .compatible = "stericsson,ab8500-ponkey", }, {} }; +MODULE_DEVICE_TABLE(of, ab8500_ponkey_match); #endif static struct platform_driver ab8500_ponkey_driver = { -- cgit v0.10.2 From 544edf56fdbf3916f4dad4e2dc71de3965a9d964 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 10:49:46 -0700 Subject: Input: pwm-beeper - fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c index e82edf8..f2261ab 100644 --- a/drivers/input/misc/pwm-beeper.c +++ b/drivers/input/misc/pwm-beeper.c @@ -173,6 +173,7 @@ static const struct of_device_id pwm_beeper_match[] = { { .compatible = "pwm-beeper", }, { }, }; +MODULE_DEVICE_TABLE(of, pwm_beeper_match); #endif static struct platform_driver pwm_beeper_driver = { -- cgit v0.10.2 From 731857715f03035c812c3f6bdcb6b0179150c1aa Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 10:50:21 -0700 Subject: Input: regulator-haptic - fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/regulator-haptic.c b/drivers/input/misc/regulator-haptic.c index 6bf3f10..a804705 100644 --- a/drivers/input/misc/regulator-haptic.c +++ b/drivers/input/misc/regulator-haptic.c @@ -249,6 +249,7 @@ static const struct of_device_id regulator_haptic_dt_match[] = { { .compatible = "regulator-haptic" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, regulator_haptic_dt_match); static struct platform_driver regulator_haptic_driver = { .probe = regulator_haptic_probe, -- cgit v0.10.2 From 81648d043191e5f8f5870c5af6060b56383b139d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Aug 2015 11:43:00 -0500 Subject: gfs2: Simplify the seq file code for "sbstats" Don't use struct gfs2_glock_iter as the helper data structure for iterating through "sbstats"; we are not iterating through glocks here. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a38e38f..a694413 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1776,10 +1776,10 @@ static const char *gfs2_stype[] = { static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) { - struct gfs2_glock_iter *gi = seq->private; - struct gfs2_sbd *sdp = gi->sdp; - unsigned index = gi->hash >> 3; - unsigned subindex = gi->hash & 0x07; + struct gfs2_sbd *sdp = seq->private; + loff_t pos = *(loff_t *)iter_ptr; + unsigned index = pos >> 3; + unsigned subindex = pos & 0x07; s64 value; int i; @@ -1930,26 +1930,19 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) { - struct gfs2_glock_iter *gi = seq->private; - - gi->hash = *pos; + preempt_disable(); if (*pos >= GFS2_NR_SBSTATS) return NULL; - preempt_disable(); - return SEQ_START_TOKEN; + return pos; } static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { - struct gfs2_glock_iter *gi = seq->private; (*pos)++; - gi->hash++; - if (gi->hash >= GFS2_NR_SBSTATS) { - preempt_enable(); + if (*pos >= GFS2_NR_SBSTATS) return NULL; - } - return SEQ_START_TOKEN; + return pos; } static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) @@ -2012,12 +2005,10 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) static int gfs2_sbstats_open(struct inode *inode, struct file *file) { - int ret = seq_open_private(file, &gfs2_sbstats_seq_ops, - sizeof(struct gfs2_glock_iter)); + int ret = seq_open(file, &gfs2_sbstats_seq_ops); if (ret == 0) { struct seq_file *seq = file->private_data; - struct gfs2_glock_iter *gi = seq->private; - gi->sdp = inode->i_private; + seq->private = inode->i_private; /* sdp */ } return ret; } @@ -2043,7 +2034,7 @@ static const struct file_operations gfs2_sbstats_fops = { .open = gfs2_sbstats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) -- cgit v0.10.2 From 15562c439d0a1850b71aa1c0d92d1f4fb9503c8d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 16 Mar 2015 11:52:05 -0500 Subject: GFS2: Move glock superblock pointer to field gl_name What uniquely identifies a glock in the glock hash table is not gl_name, but gl_name and its superblock pointer. This patch makes the gl_name field correspond to a unique glock identifier. That will allow us to simplify hashing with a future patch, since the hash algorithm can then take the gl_name and hash its components in one operation. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Acked-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a694413..13cba6e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -119,7 +119,7 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) void gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) @@ -192,7 +192,7 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); if (lockref_put_or_lock(&gl->gl_lockref)) @@ -220,7 +220,6 @@ void gfs2_glock_put(struct gfs2_glock *gl) */ static struct gfs2_glock *search_bucket(unsigned int hash, - const struct gfs2_sbd *sdp, const struct lm_lockname *name) { struct gfs2_glock *gl; @@ -229,8 +228,6 @@ static struct gfs2_glock *search_bucket(unsigned int hash, hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { if (!lm_name_equal(&gl->gl_name, name)) continue; - if (gl->gl_sbd != sdp) - continue; if (lockref_get_not_dead(&gl->gl_lockref)) return gl; } @@ -506,7 +503,7 @@ __releases(&gl->gl_spin) __acquires(&gl->gl_spin) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int lck_flags = gh ? gh->gh_flags : 0; int ret; @@ -628,7 +625,7 @@ out_unlock: static void delete_work_func(struct work_struct *work) { struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; @@ -704,14 +701,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct gfs2_glock **glp) { struct super_block *s = sdp->sd_vfs; - struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; + struct lm_lockname name = { .ln_number = number, + .ln_type = glops->go_type, + .ln_sbd = sdp }; struct gfs2_glock *gl, *tmp; unsigned int hash = gl_hash(sdp, &name); struct address_space *mapping; struct kmem_cache *cachep; rcu_read_lock(); - gl = search_bucket(hash, sdp, &name); + gl = search_bucket(hash, &name); rcu_read_unlock(); *glp = gl; @@ -739,7 +738,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } atomic_inc(&sdp->sd_glock_disposal); - gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; gl->gl_lockref.count = 1; @@ -772,7 +770,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } spin_lock_bucket(hash); - tmp = search_bucket(hash, sdp, &name); + tmp = search_bucket(hash, &name); if (tmp) { spin_unlock_bucket(hash); kfree(gl->gl_lksb.sb_lvbptr); @@ -928,7 +926,7 @@ __releases(&gl->gl_spin) __acquires(&gl->gl_spin) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; int try_futile = 0; @@ -1006,7 +1004,7 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) @@ -1313,7 +1311,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; spin_lock(&gl->gl_spin); gl->gl_reply = ret; @@ -1471,7 +1469,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) + if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); } rcu_read_unlock(); @@ -1569,7 +1567,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) int ret; ret = gfs2_truncatei_resume(ip); - gfs2_assert_withdraw(gl->gl_sbd, ret == 0); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); @@ -1872,7 +1870,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || + } while (gi->sdp != gi->gl->gl_name.ln_sbd || __lockref_is_dead(&gi->gl->gl_lockref)); return 0; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index fa3fa5e..1f6c9c3 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -32,13 +32,15 @@ struct workqueue_struct *gfs2_freeze_wq; static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { - fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", + fs_err(gl->gl_name.ln_sbd, + "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page " + "state 0x%lx\n", bh, (unsigned long long)bh->b_blocknr, bh->b_state, bh->b_page->mapping, bh->b_page->flags); - fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", + fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n", gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); - gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); + gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n"); } /** @@ -52,7 +54,7 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, unsigned int nr_revokes) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd, *tmp; struct buffer_head *bh; @@ -80,7 +82,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, static void gfs2_ail_empty_gl(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; memset(&tr, 0, sizeof(tr)); @@ -109,7 +111,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; @@ -139,7 +141,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static void rgrp_go_sync(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd; int error; @@ -179,7 +181,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd = gl->gl_object; @@ -218,7 +220,7 @@ static void inode_go_sync(struct gfs2_glock *gl) GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); + gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH); filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -252,7 +254,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gl->gl_object; - gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); @@ -264,9 +266,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } } - if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { - gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); - gl->gl_sbd->sd_rindex_uptodate = 0; + if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) { + gfs2_log_flush(gl->gl_name.ln_sbd, NULL, NORMAL_FLUSH); + gl->gl_name.ln_sbd->sd_rindex_uptodate = 0; } if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); @@ -281,7 +283,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) static int inode_go_demote_ok(const struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_holder *gh; if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) @@ -416,7 +418,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) static int inode_go_lock(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = gl->gl_object; int error = 0; @@ -477,7 +479,7 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) static void freeze_go_sync(struct gfs2_glock *gl) { int error = 0; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { @@ -500,7 +502,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; struct gfs2_log_header_host head; @@ -545,7 +547,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl) static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) return; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a1ec7c2..35a55f3 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -203,13 +203,15 @@ enum { }; struct lm_lockname { + struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; }; #define lm_name_equal(name1, name2) \ - (((name1)->ln_number == (name2)->ln_number) && \ - ((name1)->ln_type == (name2)->ln_type)) + (((name1)->ln_number == (name2)->ln_number) && \ + ((name1)->ln_type == (name2)->ln_type) && \ + ((name1)->ln_sbd == (name2)->ln_sbd)) struct gfs2_glock_operations { @@ -327,7 +329,6 @@ enum { struct gfs2_glock { struct hlist_bl_node gl_list; - struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; @@ -835,7 +836,7 @@ static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) { - const struct gfs2_sbd *sdp = gl->gl_sbd; + const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; preempt_disable(); this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++; preempt_enable(); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 641383a..c962cfc 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -80,7 +80,7 @@ static inline void gfs2_update_reply_times(struct gfs2_glock *gl) preempt_disable(); rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */ preempt_enable(); @@ -108,7 +108,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) dstamp = gl->gl_dstamp; gl->gl_dstamp = ktime_get_real(); irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */ preempt_enable(); @@ -253,7 +253,7 @@ static void gfs2_reverse_hex(char *c, u64 value) static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; int req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; @@ -281,7 +281,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, static void gdlm_put_lock(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int lvb_needs_unlock = 0; int error; @@ -319,7 +319,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) static void gdlm_cancel(struct gfs2_glock *gl) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 2c1ae86..7833394 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -70,7 +70,7 @@ static bool buffer_is_rgrp(const struct gfs2_bufdata *bd) static void maybe_release_space(struct gfs2_bufdata *bd) { struct gfs2_glock *gl = bd->bd_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_rgrpd *rgd = gl->gl_object; unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; @@ -585,7 +585,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, static void gfs2_meta_sync(struct gfs2_glock *gl) { struct address_space *mapping = gfs2_glock2aspace(gl); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error; if (mapping == NULL) @@ -595,7 +595,7 @@ static void gfs2_meta_sync(struct gfs2_glock *gl) error = filemap_fdatawait(mapping); if (error) - gfs2_io_error(gl->gl_sbd); + gfs2_io_error(gl->gl_name.ln_sbd); } static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index b984a6e..0e1d4be 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -114,7 +114,7 @@ const struct address_space_operations gfs2_rgrp_aops = { struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { struct address_space *mapping = gfs2_glock2aspace(gl); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct page *page; struct buffer_head *bh; unsigned int shift; @@ -200,7 +200,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct buffer_head *bh; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { @@ -362,7 +362,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct buffer_head *first_bh, *bh; u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index ac5d802..8ca1615 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -44,7 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) { struct inode *inode = mapping->host; if (mapping->a_ops == &gfs2_meta_aops) - return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; + return (((struct gfs2_glock *)mapping) - 1)->gl_name.ln_sbd; else if (mapping->a_ops == &gfs2_rgrp_aops) return container_of(mapping, struct gfs2_sbd, sd_aspace); else diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 9b61f92..3a31226 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -119,7 +119,7 @@ static void gfs2_qd_dispose(struct list_head *list) while (!list_empty(list)) { qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); - sdp = qd->qd_gl->gl_sbd; + sdp = qd->qd_gl->gl_name.ln_sbd; list_del(&qd->qd_lru); @@ -302,7 +302,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, static void qd_hold(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); lockref_get(&qd->qd_lockref); } @@ -367,7 +367,7 @@ static void slot_put(struct gfs2_quota_data *qd) static int bh_get(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); unsigned int block, offset; struct buffer_head *bh; @@ -414,7 +414,7 @@ fail: static void bh_put(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; mutex_lock(&sdp->sd_quota_mutex); gfs2_assert(sdp, qd->qd_bh_count); @@ -486,7 +486,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) static void qd_unlock(struct gfs2_quota_data *qd) { - gfs2_assert_warn(qd->qd_gl->gl_sbd, + gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd, test_bit(QDF_LOCKED, &qd->qd_flags)); clear_bit(QDF_LOCKED, &qd->qd_flags); bh_put(qd); @@ -614,7 +614,7 @@ static int sort_qd(const void *a, const void *b) static void do_qc(struct gfs2_quota_data *qd, s64 change) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); struct gfs2_quota_change *qc = qd->qd_bh_qc; s64 x; @@ -831,7 +831,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { - struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; @@ -922,7 +922,7 @@ out: gfs2_glock_dq_uninit(&ghs[qx]); mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH); + gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH); return error; } @@ -954,7 +954,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; int error; @@ -1037,7 +1037,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) static int need_sync(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_tune *gt = &sdp->sd_tune; s64 value; unsigned int num, den; @@ -1125,7 +1125,7 @@ out: static int print_message(struct gfs2_quota_data *qd, char *type) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; fs_info(sdp, "quota %s for %s %u\n", type, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c6c6232..c92ae7fd 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1860,7 +1860,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) { const struct gfs2_glock *gl = rgd->rd_gl; - const struct gfs2_sbd *sdp = gl->gl_sbd; + const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_lkstats *st; s64 r_dcount, l_dcount; s64 l_srttb, a_srttb = 0; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 20c007d..fff47d0 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -104,7 +104,7 @@ TRACE_EVENT(gfs2_glock_state_change, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gl->gl_name.ln_number; __entry->gltype = gl->gl_name.ln_type; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -140,7 +140,7 @@ TRACE_EVENT(gfs2_glock_put, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -174,7 +174,7 @@ TRACE_EVENT(gfs2_demote_rq, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -209,7 +209,7 @@ TRACE_EVENT(gfs2_promote, ), TP_fast_assign( - __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; __entry->first = first; @@ -239,7 +239,7 @@ TRACE_EVENT(gfs2_glock_queue, ), TP_fast_assign( - __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; __entry->queue = queue; @@ -278,7 +278,7 @@ TRACE_EVENT(gfs2_glock_lock_time, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gl->gl_name.ln_number; __entry->gltype = gl->gl_name.ln_type; __entry->status = gl->gl_lksb.sb_status; @@ -333,7 +333,7 @@ TRACE_EVENT(gfs2_pin, ), TP_fast_assign( - __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = bd->bd_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->pin = pin; __entry->len = bd->bd_bh->b_size; __entry->block = bd->bd_bh->b_blocknr; @@ -449,7 +449,7 @@ TRACE_EVENT(gfs2_bmap, ), TP_fast_assign( - __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->lblock = lblock; __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; __entry->inum = ip->i_no_addr; @@ -489,7 +489,7 @@ TRACE_EVENT(gfs2_block_alloc, ), TP_fast_assign( - __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = rgd->rd_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->start = block; __entry->inum = ip->i_no_addr; __entry->len = len; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 88bff24..b95d0d6 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -158,7 +158,7 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) { struct gfs2_trans *tr = current->journal_info; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); struct gfs2_bufdata *bd; @@ -224,7 +224,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_bufdata *bd; lock_buffer(bh); -- cgit v0.10.2 From 88ffbf3e037e67b52c46d528aca1618489c21f68 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 16 Mar 2015 11:02:46 -0500 Subject: GFS2: Use resizable hash table for glocks This patch changes the glock hash table from a normal hash table to a resizable hash table, which scales better. This also simplifies a lot of code. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Acked-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13cba6e..edb15ee 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -50,9 +51,8 @@ #include "trace_gfs2.h" struct gfs2_glock_iter { - int hash; /* hash bucket index */ - unsigned nhash; /* Index within current bucket */ struct gfs2_sbd *sdp; /* incore superblock */ + struct rhashtable_iter hti; /* rhashtable iterator */ struct gfs2_glock *gl; /* current glock struct */ loff_t last_pos; /* last position */ }; @@ -70,44 +70,19 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) -#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; -static struct dentry *gfs2_root; - -/** - * gl_hash() - Turn glock number into hash bucket number - * @lock: The glock number - * - * Returns: The number of the corresponding hash bucket - */ - -static unsigned int gl_hash(const struct gfs2_sbd *sdp, - const struct lm_lockname *name) -{ - unsigned int h; - - h = jhash(&name->ln_number, sizeof(u64), 0); - h = jhash(&name->ln_type, sizeof(unsigned int), h); - h = jhash(&sdp, sizeof(struct gfs2_sbd *), h); - h &= GFS2_GL_HASH_MASK; - - return h; -} - -static inline void spin_lock_bucket(unsigned int hash) -{ - hlist_bl_lock(&gl_hash_table[hash]); -} +static struct rhashtable_params ht_parms = { + .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, + .key_len = sizeof(struct lm_lockname), + .key_offset = offsetof(struct gfs2_glock, gl_name), + .head_offset = offsetof(struct gfs2_glock, gl_node), +}; -static inline void spin_unlock_bucket(unsigned int hash) -{ - hlist_bl_unlock(&gl_hash_table[hash]); -} +static struct rhashtable gl_hash_table; -static void gfs2_glock_dealloc(struct rcu_head *rcu) +void gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (gl->gl_ops->go_flags & GLOF_ASPACE) { kmem_cache_free(gfs2_glock_aspace_cachep, gl); @@ -115,13 +90,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(gfs2_glock_cachep, gl); } -} - -void gfs2_glock_free(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); } @@ -202,9 +170,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); @@ -212,30 +178,6 @@ void gfs2_glock_put(struct gfs2_glock *gl) } /** - * search_bucket() - Find struct gfs2_glock by lock number - * @bucket: the bucket to search - * @name: The lock name - * - * Returns: NULL, or the struct gfs2_glock with the requested number - */ - -static struct gfs2_glock *search_bucket(unsigned int hash, - const struct lm_lockname *name) -{ - struct gfs2_glock *gl; - struct hlist_bl_node *h; - - hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { - if (!lm_name_equal(&gl->gl_name, name)) - continue; - if (lockref_get_not_dead(&gl->gl_lockref)) - return gl; - } - - return NULL; -} - -/** * may_grant - check if its ok to grant a new lock * @gl: The glock * @gh: The lock request which we wish to grant @@ -704,14 +646,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; - struct gfs2_glock *gl, *tmp; - unsigned int hash = gl_hash(sdp, &name); + struct gfs2_glock *gl, *tmp = NULL; struct address_space *mapping; struct kmem_cache *cachep; + int ret, tries = 0; - rcu_read_lock(); - gl = search_bucket(hash, &name); - rcu_read_unlock(); + gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); + if (gl && !lockref_get_not_dead(&gl->gl_lockref)) + gl = NULL; *glp = gl; if (gl) @@ -738,13 +680,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } atomic_inc(&sdp->sd_glock_disposal); + gl->gl_node.next = NULL; gl->gl_flags = 0; gl->gl_name = name; gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; - gl->gl_hash = hash; gl->gl_ops = glops; gl->gl_dstamp = ktime_set(0, 0); preempt_disable(); @@ -769,22 +711,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->writeback_index = 0; } - spin_lock_bucket(hash); - tmp = search_bucket(hash, &name); - if (tmp) { - spin_unlock_bucket(hash); - kfree(gl->gl_lksb.sb_lvbptr); - kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); - gl = tmp; - } else { - hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); - spin_unlock_bucket(hash); +again: + ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node, + ht_parms); + if (ret == 0) { + *glp = gl; + return 0; } - *glp = gl; + if (ret == -EEXIST) { + ret = 0; + tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); + if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { + if (++tries < 100) { + cond_resched(); + goto again; + } + tmp = NULL; + ret = -ENOMEM; + } + } else { + WARN_ON_ONCE(ret); + } + kfree(gl->gl_lksb.sb_lvbptr); + kmem_cache_free(cachep, gl); + atomic_dec(&sdp->sd_glock_disposal); + *glp = tmp; - return 0; + return ret; } /** @@ -1460,31 +1414,26 @@ static struct shrinker glock_shrinker = { * */ -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, - unsigned int hash) +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct hlist_bl_head *head = &gl_hash_table[hash]; - struct hlist_bl_node *pos; + struct rhash_head *pos, *next; + const struct bucket_table *tbl; + int i; rcu_read_lock(); - hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) - examiner(gl); + tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) { + if ((gl->gl_name.ln_sbd == sdp) && + lockref_get_not_dead(&gl->gl_lockref)) + examiner(gl); + } } rcu_read_unlock(); cond_resched(); } -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) -{ - unsigned x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(examiner, sdp, x); -} - - /** * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw @@ -1802,20 +1751,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) int __init gfs2_glock_init(void) { - unsigned i; - for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_HLIST_BL_HEAD(&gl_hash_table[i]); - } + int ret; + + ret = rhashtable_init(&gl_hash_table, &ht_parms); + if (ret < 0) + return ret; glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (!glock_workqueue) + if (!glock_workqueue) { + rhashtable_destroy(&gl_hash_table); return -ENOMEM; + } gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs2_delete_workqueue) { destroy_workqueue(glock_workqueue); + rhashtable_destroy(&gl_hash_table); return -ENOMEM; } @@ -1827,72 +1780,41 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { unregister_shrinker(&glock_shrinker); + rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); destroy_workqueue(gfs2_delete_workqueue); } -static inline struct gfs2_glock *glock_hash_chain(unsigned hash) +static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { - return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), - struct gfs2_glock, gl_list); -} - -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) -{ - return hlist_bl_entry(rcu_dereference(gl->gl_list.next), - struct gfs2_glock, gl_list); -} - -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) -{ - struct gfs2_glock *gl; - do { - gl = gi->gl; - if (gl) { - gi->gl = glock_hash_next(gl); - gi->nhash++; - } else { - if (gi->hash >= GFS2_GL_HASH_SIZE) { - rcu_read_unlock(); - return 1; - } - gi->gl = glock_hash_chain(gi->hash); - gi->nhash = 0; - } - while (gi->gl == NULL) { - gi->hash++; - if (gi->hash >= GFS2_GL_HASH_SIZE) { - rcu_read_unlock(); - return 1; - } - gi->gl = glock_hash_chain(gi->hash); - gi->nhash = 0; + gi->gl = rhashtable_walk_next(&gi->hti); + if (IS_ERR(gi->gl)) { + if (PTR_ERR(gi->gl) == -EAGAIN) + continue; + gi->gl = NULL; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_name.ln_sbd || - __lockref_is_dead(&gi->gl->gl_lockref)); - - return 0; + } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) || + __lockref_is_dead(&gi->gl->gl_lockref))); } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; + int ret; if (gi->last_pos <= *pos) - n = gi->nhash + (*pos - gi->last_pos); - else - gi->hash = 0; + n = (*pos - gi->last_pos); - gi->nhash = 0; - rcu_read_lock(); + ret = rhashtable_walk_start(&gi->hti); + if (ret) + return NULL; do { - if (gfs2_glock_iter_next(gi)) - return NULL; - } while (n--); + gfs2_glock_iter_next(gi); + } while (gi->gl && n--); gi->last_pos = *pos; return gi->gl; @@ -1905,9 +1827,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; gi->last_pos = *pos; - if (gfs2_glock_iter_next(gi)) - return NULL; - + gfs2_glock_iter_next(gi); return gi->gl; } @@ -1915,9 +1835,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock_iter *gi = seq->private; - if (gi->gl) - rcu_read_unlock(); gi->gl = NULL; + rhashtable_walk_stop(&gi->hti); } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) @@ -1978,14 +1897,28 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) if (ret == 0) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); } return ret; } +static int gfs2_glocks_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + + gi->gl = NULL; + rhashtable_walk_exit(&gi->hti); + return seq_release_private(inode, file); +} + static int gfs2_glstats_open(struct inode *inode, struct file *file) { int ret = seq_open_private(file, &gfs2_glstats_seq_ops, @@ -1994,9 +1927,12 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); } return ret; } @@ -2016,7 +1952,7 @@ static const struct file_operations gfs2_glocks_fops = { .open = gfs2_glocks_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = gfs2_glocks_release, }; static const struct file_operations gfs2_glstats_fops = { @@ -2024,7 +1960,7 @@ static const struct file_operations gfs2_glstats_fops = { .open = gfs2_glstats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = gfs2_glocks_release, }; static const struct file_operations gfs2_sbstats_fops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 35a55f3..e300f74 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -22,6 +22,7 @@ #include #include #include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -342,7 +343,6 @@ struct gfs2_glock { gl_req:2, /* State in last dlm request */ gl_reply:8; /* Last reply from the dlm */ - unsigned int gl_hash; unsigned long gl_demote_time; /* time of first demote request */ long gl_hold_time; struct list_head gl_holders; @@ -368,7 +368,7 @@ struct gfs2_glock { loff_t end; } gl_vm; }; - struct rcu_head gl_rcu; + struct rhash_head gl_node; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ -- cgit v0.10.2 From 4d207133e9c362bc05a3bb6701d63eeb75cc4b77 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 27 Aug 2015 12:51:45 -0500 Subject: gfs2: Make statistics unsigned, suitable for use with do_div() None of these statistics can meaningfully be negative, and the numerator for do_div() must have the type u64. The generic implementation of do_div() used on some 32-bit architectures asserts that, resulting in a compiler error in gfs2_rgrp_congested(). Fixes: 0166b197c2ed ("GFS2: Average in only non-zero round-trip times ...") Signed-off-by: Ben Hutchings Signed-off-by: Bob Peterson Acked-by: Andreas Gruenbacher diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index edb15ee..37d4db1 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1680,17 +1680,17 @@ static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock *gl = iter_ptr; - seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n", + seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, - (long long)gl->gl_stats.stats[GFS2_LKS_SRTT], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], - (long long)gl->gl_stats.stats[GFS2_LKS_SIRT], - (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], - (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], - (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); return 0; } @@ -1727,7 +1727,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) loff_t pos = *(loff_t *)iter_ptr; unsigned index = pos >> 3; unsigned subindex = pos & 0x07; - s64 value; + u64 value; int i; if (index == 0 && subindex != 0) @@ -1743,7 +1743,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) } else { value = lkstats->lkstats[index - 1].stats[subindex]; } - seq_printf(seq, " %15lld", (long long)value); + seq_printf(seq, " %15llu", (long long)value); } seq_putc(seq, '\n'); return 0; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e300f74..121ed08 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -244,7 +244,7 @@ enum { }; struct gfs2_lkstats { - s64 stats[GFS2_NR_LKSTATS]; + u64 stats[GFS2_NR_LKSTATS]; }; enum { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c92ae7fd..475985d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1862,11 +1862,11 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) const struct gfs2_glock *gl = rgd->rd_gl; const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_lkstats *st; - s64 r_dcount, l_dcount; - s64 l_srttb, a_srttb = 0; + u64 r_dcount, l_dcount; + u64 l_srttb, a_srttb = 0; s64 srttb_diff; - s64 sqr_diff; - s64 var; + u64 sqr_diff; + u64 var; int cpu, nonzero = 0; preempt_disable(); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index fff47d0..49ac55d 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -267,14 +267,14 @@ TRACE_EVENT(gfs2_glock_lock_time, __field( int, status ) __field( char, flags ) __field( s64, tdiff ) - __field( s64, srtt ) - __field( s64, srttvar ) - __field( s64, srttb ) - __field( s64, srttvarb ) - __field( s64, sirt ) - __field( s64, sirtvar ) - __field( s64, dcount ) - __field( s64, qcount ) + __field( u64, srtt ) + __field( u64, srttvar ) + __field( u64, srttb ) + __field( u64, srttvarb ) + __field( u64, sirt ) + __field( u64, sirtvar ) + __field( u64, dcount ) + __field( u64, qcount ) ), TP_fast_assign( -- cgit v0.10.2 From c9ea8c8b74b5fb2584879e4338770ed252d8a489 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Aug 2015 13:05:08 -0500 Subject: gfs2: Fix a typo in a comment Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c962cfc..284c154 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -31,7 +31,7 @@ extern struct workqueue_struct *gfs2_control_wq; * * @delta is the difference between the current rtt sample and the * running average srtt. We add 1/8 of that to the srtt in order to - * update the current srtt estimate. The varience estimate is a bit + * update the current srtt estimate. The variance estimate is a bit * more complicated. We subtract the abs value of the @delta from * the current variance estimate and add 1/4 of that to the running * total. -- cgit v0.10.2 From 8f7e0a806db0a3ba33234af3c39d68ed8c144071 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Aug 2015 13:02:54 -0500 Subject: gfs2: A minor "sbstats" cleanup It seems cleaner to avoid the temporary value here. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 37d4db1..9bd1244 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1727,7 +1727,6 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) loff_t pos = *(loff_t *)iter_ptr; unsigned index = pos >> 3; unsigned subindex = pos & 0x07; - u64 value; int i; if (index == 0 && subindex != 0) @@ -1738,12 +1737,12 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) for_each_possible_cpu(i) { const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); - if (index == 0) { - value = i; - } else { - value = lkstats->lkstats[index - 1].stats[subindex]; - } - seq_printf(seq, " %15llu", (long long)value); + + if (index == 0) + seq_printf(seq, " %15u", i); + else + seq_printf(seq, " %15llu", (unsigned long long)lkstats-> + lkstats[index - 1].stats[subindex]); } seq_putc(seq, '\n'); return 0; -- cgit v0.10.2 From 66c117d7fa2ae429911e60d84bf31a90b2b96189 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Sep 2015 12:34:55 +0200 Subject: x86/alternatives: Make optimize_nops() interrupt safe and synced Richard reported the following crash: [ 0.036000] BUG: unable to handle kernel paging request at 55501e06 [ 0.036000] IP: [] common_interrupt+0xb/0x38 [ 0.036000] Call Trace: [ 0.036000] [] ? add_nops+0x90/0xa0 [ 0.036000] [] apply_alternatives+0x274/0x630 Chuck decoded: " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx 6: 80 fc 0f cmp $0xf,%ah 9: a8 0f test $0xf,%al >> b: a0 06 1e 50 55 mov 0x55501e06,%al 10: 57 push %edi 11: 56 push %esi Interrupt 0x30 occurred while the alternatives code was replacing the initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the optimized version, 0x8d,0x76,0x00. Only the first byte has been replaced so far, and it makes a mess out of the insn decoding." optimize_nops() is buggy in two aspects: - It's not disabling interrupts across the modification - It's lacking a sync_core() call Add both. Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding' Reported-and-tested-by: "Richard W.M. Jones" Signed-off-by: Thomas Gleixner Cc: Richard W.M. Jones Cc: Chuck Ebbert Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827e..25f9093 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -338,10 +338,15 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); -- cgit v0.10.2 From dc3a04d551b5d21f1badbb39bfe8e5bc1289b184 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Sep 2015 17:11:22 -0700 Subject: security/device_cgroup: Fix RCU_LOCKDEP_WARN() condition f78f5b90c4ff ("rcu: Rename rcu_lockdep_assert() to RCU_LOCKDEP_WARN()") introduced a bug by incorrectly inverting the condition when moving from rcu_lockdep_assert() to RCU_LOCKDEP_WARN(). This commit therefore fixes the inversion. Reported-by: Felipe Balbi Reported-by: Tejun Heo Signed-off-by: Paul E. McKenney Acked-by: Serge Hallyn Tested-by: Josh Boyer diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 73455089..03c1652 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -401,7 +401,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, bool match = false; RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - lockdep_is_held(&devcgroup_mutex), + !lockdep_is_held(&devcgroup_mutex), "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { -- cgit v0.10.2 From 26492f195eed08b95ad5acdfbe625062ad7d86c6 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 10:50:57 -0700 Subject: Input: sparcspkr - fix module autoload for OF platform drivers These platform drivers have a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index 54116e5..6f997aa 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -253,6 +253,7 @@ static const struct of_device_id bbc_beep_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, bbc_beep_match); static struct platform_driver bbc_beep_driver = { .driver = { @@ -332,6 +333,7 @@ static const struct of_device_id grover_beep_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, grover_beep_match); static struct platform_driver grover_beep_driver = { .driver = { -- cgit v0.10.2 From 22c15e5e008297c90daa8657fea1a3f0e0675454 Mon Sep 17 00:00:00 2001 From: James Chen Date: Thu, 3 Sep 2015 22:12:38 -0700 Subject: Input: elants_i2c - extend the calibration timeout to 12 seconds The 23 inch device found in Chrome project buddy requires 9.2~10.5 seconds to complete calibration. Let's increase calibration timeout to 12 seconds to give the device enough time. Signed-off-by: James Chen Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index ddac134..17cc20e 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -102,7 +102,7 @@ #define ELAN_FW_PAGESIZE 132 /* calibration timeout definition */ -#define ELAN_CALI_TIMEOUT_MSEC 10000 +#define ELAN_CALI_TIMEOUT_MSEC 12000 #define ELAN_POWERON_DELAY_USEC 500 #define ELAN_RESET_DELAY_MSEC 20 -- cgit v0.10.2 From eb38f3a4f6e86f8bb10a3217ebd85ecc5d763aae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 3 Sep 2015 22:20:00 -0700 Subject: Input: evdev - do not report errors form flush() We've got bug reports showing the old systemd-logind (at least system-210) aborting unexpectedly, and this turned out to be because of an invalid error code from close() call to evdev devices. close() is supposed to return only either EINTR or EBADFD, while the device returned ENODEV. logind was overreacting to it and decided to kill itself when an unexpected error code was received. What a tragedy. The bad error code comes from flush fops, and actually evdev_flush() returns ENODEV when device is disconnected or client's access to it is revoked. But in these cases the fact that flush did not actually happen is not an error, but rather normal behavior. For non-disconnected devices result of flush is also not that interesting as there is no potential of data loss and even if it fails application has no way of handling the error. Because of that we are better off always returning success from evdev_flush(). Also returning EINTR from flush()/close() is discouraged (as it is not clear how application should handle this error), so let's stop taking evdev->mutex interruptibly. Bugzilla: http://bugzilla.suse.com/show_bug.cgi?id=939834 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 9d35499..08d4964 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -290,19 +290,14 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; + mutex_lock(&evdev->mutex); - if (!evdev->exist || client->revoked) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + if (evdev->exist && !client->revoked) + input_flush_device(&evdev->handle, file); mutex_unlock(&evdev->mutex); - return retval; + return 0; } static void evdev_free(struct device *dev) -- cgit v0.10.2 From 04bdfa8ab5bab929cc57f73952c503a88372601d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Sep 2015 16:24:38 +0200 Subject: arm/arm64: KVM: vgic: Move active state handling to flush_hwstate We currently set the physical active state only when we *inject* a new pending virtual interrupt, but this is actually not correct, because we could have been preempted and run something else on the system that resets the active state to clear. This causes us to run the VM with the timer set to fire, but without setting the physical active state. The solution is to always check the LR configurations, and we if have a mapped interrupt in the LR in either the pending or active state (virtual), then set the physical active state. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9eb489a..6bd1c9b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1144,26 +1144,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, struct irq_phys_map *map; map = vgic_irq_map_search(vcpu, irq); - /* - * If we have a mapping, and the virtual interrupt is - * being injected, then we must set the state to - * active in the physical world. Otherwise the - * physical interrupt will fire and the guest will - * exit before processing the virtual interrupt. - */ if (map) { - int ret; - - BUG_ON(!map->active); vlr.hwirq = map->phys_irq; vlr.state |= LR_HW; vlr.state &= ~LR_EOI_INT; - ret = irq_set_irqchip_state(map->irq, - IRQCHIP_STATE_ACTIVE, - true); - WARN_ON(ret); - /* * Make sure we're not going to sample this * again, as a HW-backed interrupt cannot be @@ -1255,7 +1240,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long *pa_percpu, *pa_shared; - int i, vcpu_id; + int i, vcpu_id, lr, ret; int overflow = 0; int nr_shared = vgic_nr_shared_irqs(dist); @@ -1310,6 +1295,31 @@ epilog: */ clear_bit(vcpu_id, dist->irq_pending_on_cpu); } + + for (lr = 0; lr < vgic->nr_lr; lr++) { + struct vgic_lr vlr; + + if (!test_bit(lr, vgic_cpu->lr_used)) + continue; + + vlr = vgic_get_lr(vcpu, lr); + + /* + * If we have a mapping, and the virtual interrupt is + * presented to the guest (as pending or active), then we must + * set the state to active in the physical world. See + * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt. + */ + if (vlr.state & LR_HW) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, vlr.irq); + + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + true); + WARN_ON(ret); + } + } } static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 4ad9e16af36bbe8657aabe494ff912acbc213ce4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Sep 2015 16:24:39 +0200 Subject: arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0 Provide a better quality of implementation and be architecture compliant on ARMv7 for the architected timer by resetting the CNTV_CTL to 0 on reset of the timer. This change alone fixes the UEFI reset issue reported by Laszlo back in February. Cc: Laszlo Ersek Cc: Ard Biesheuvel Cc: Drew Jones Cc: Wei Huang Cc: Peter Maydell Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 76e38d2..48c6e1a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -200,6 +200,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, timer->irq = irq; /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + timer->cntv_ctl = 0; + + /* * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. */ -- cgit v0.10.2 From 857d1a973077245f03b351e2539529c86267bfe4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Aug 2015 14:42:05 +0100 Subject: arm64: KVM: set {v,}TCR_EL2 RES1 bits Currently we don't set the RES1 bits of TCR_EL2 and VTCR_EL2 when configuring them, which could lead to unexpected behaviour when an architectural meaning is defined for those bits. Set the RES1 bits to avoid issues. Cc: Catalin Marinas Cc: Christoffer Dall Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7605e09..cbc5e1a 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -95,6 +95,7 @@ SCTLR_EL2_SA | SCTLR_EL2_I) /* TCR_EL2 Registers bits */ +#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS (7 << 16) #define TCR_EL2_PS_40B (2 << 16) @@ -106,9 +107,10 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) +#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_PS_MASK (7 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) @@ -147,7 +149,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -158,7 +161,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif -- cgit v0.10.2 From 0c0672922dcc70ffba11d96385e98e42fb3ae08d Mon Sep 17 00:00:00 2001 From: Alexander Spyridakis Date: Fri, 4 Sep 2015 17:06:24 +0200 Subject: arm/arm64: KVM: Fix PSCI affinity info return value for non valid cores If a guest requests the affinity info for a non-existing vCPU we need to properly return an error, instead of erroneously reporting an off state. Signed-off-by: Alexander Spyridakis Signed-off-by: Alvise Rigo Acked-by: Mark Rutland Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 4b94b51..ad6f642 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -126,7 +126,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) { - int i; + int i, matching_cpus = 0; unsigned long mpidr; unsigned long target_affinity; unsigned long target_affinity_mask; @@ -151,12 +151,16 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) */ kvm_for_each_vcpu(i, tmp, kvm) { mpidr = kvm_vcpu_get_mpidr_aff(tmp); - if (((mpidr & target_affinity_mask) == target_affinity) && - !tmp->arch.pause) { - return PSCI_0_2_AFFINITY_LEVEL_ON; + if ((mpidr & target_affinity_mask) == target_affinity) { + matching_cpus++; + if (!tmp->arch.pause) + return PSCI_0_2_AFFINITY_LEVEL_ON; } } + if (!matching_cpus) + return PSCI_RET_INVALID_PARAMS; + return PSCI_0_2_AFFINITY_LEVEL_OFF; } -- cgit v0.10.2 From 6a4756f6951f97a9a601ba24632c5906750ef00d Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 1 May 2015 15:58:19 +0200 Subject: cris: arch-v32: gpio: Use kzalloc instead of kmalloc/memset Turn a kmalloc/memset into an equivalent kzalloc. Doing so also move the zero'ing of the memory outside of a mutex. Signed-off-by: Christophe Jaillet Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c index 009f4ee..72968fb 100644 --- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c +++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c @@ -425,12 +425,11 @@ gpio_open(struct inode *inode, struct file *filp) if (p > GPIO_MINOR_LAST) return -EINVAL; - priv = kmalloc(sizeof(struct gpio_private), GFP_KERNEL); + priv = kzalloc(sizeof(struct gpio_private), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_lock(&gpio_mutex); - memset(priv, 0, sizeof(*priv)); priv->minor = p; -- cgit v0.10.2 From 98a725a57a8e92bfbe962a8c69975ce0f9ff11fe Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 14 May 2015 14:36:55 +0200 Subject: CRISv32: add unreachable() to BUG() Add an unreachable() in the BUG() implementations, to get rid of several warnings similar to the one below: kernel/sched/core.c: In function 'pick_next_task': kernel/sched/core.c:2690:1: warning: control reaches end of non-void function [-Wreturn-type] Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/arch-v32/arch/bug.h b/arch/cris/include/arch-v32/arch/bug.h index 0f211e1..fb59faa 100644 --- a/arch/cris/include/arch-v32/arch/bug.h +++ b/arch/cris/include/arch-v32/arch/bug.h @@ -10,6 +10,7 @@ * All other stuff is done out-of-band with exception handlers. */ #define BUG() \ +do { \ __asm__ __volatile__ ("0: break 14\n\t" \ ".section .fixup,\"ax\"\n" \ "1:\n\t" \ @@ -21,9 +22,15 @@ ".section __ex_table,\"a\"\n\t" \ ".dword 0b, 1b\n\t" \ ".previous\n\t" \ - : : "ri" (__FILE__), "i" (__LINE__)) + : : "ri" (__FILE__), "i" (__LINE__)); \ + unreachable(); \ +} while (0) #else -#define BUG() __asm__ __volatile__ ("break 14\n\t") +#define BUG() \ +do { \ + __asm__ __volatile__ ("break 14\n\t"); \ + unreachable(); \ +} while (0) #endif #define HAVE_ARCH_BUG -- cgit v0.10.2 From f59625a8f770741d590da09be4f0fcfc75b31e46 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 14 May 2015 14:36:56 +0200 Subject: CRISv32: allow CONFIG_DEBUG_BUGVERBOSE Support for verbose BUG reporting already exists, but the HAVE flag that allows the option to be enabled is missing. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 0314e32..b202b82 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -58,6 +58,7 @@ config CRIS select CLKSRC_MMIO if ETRAX_ARCH_V32 select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32 + select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32 config HZ int -- cgit v0.10.2 From ee59843cd2b4331db49671c7f045ee2ff9d535b6 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 6 May 2015 21:48:12 +0800 Subject: CRIS: Wire up missing syscalls The related warnings: CALL scripts/checksyscalls.sh :1229:2: warning: #warning syscall sched_setattr not implemented [-Wcpp] :1232:2: warning: #warning syscall sched_getattr not implemented [-Wcpp] :1235:2: warning: #warning syscall renameat2 not implemented [-Wcpp] :1238:2: warning: #warning syscall seccomp not implemented [-Wcpp] :1241:2: warning: #warning syscall getrandom not implemented [-Wcpp] :1244:2: warning: #warning syscall memfd_create not implemented [-Wcpp] :1247:2: warning: #warning syscall bpf not implemented [-Wcpp] :1250:2: warning: #warning syscall execveat not implemented [-Wcpp] Signed-off-by: Chen Gang Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index 81570fc..b562252 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -955,6 +955,14 @@ sys_call_table: .long sys_process_vm_writev .long sys_kcmp /* 350 */ .long sys_finit_module + .long sys_sched_setattr + .long sys_sched_getattr + .long sys_renameat2 + .long sys_seccomp /* 355 */ + .long sys_getrandom + .long sys_memfd_create + .long sys_bpf + .long sys_execveat /* * NOTE!! This doesn't have to be exact - we just have diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 026a0b2..1c5595a 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -875,6 +875,14 @@ sys_call_table: .long sys_process_vm_writev .long sys_kcmp /* 350 */ .long sys_finit_module + .long sys_sched_setattr + .long sys_sched_getattr + .long sys_renameat2 + .long sys_seccomp /* 355 */ + .long sys_getrandom + .long sys_memfd_create + .long sys_bpf + .long sys_execveat /* * NOTE!! This doesn't have to be exact - we just have diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index 0f40fed..9c23535 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 360 +#define NR_syscalls 365 #include diff --git a/arch/cris/include/uapi/asm/unistd.h b/arch/cris/include/uapi/asm/unistd.h index f3287fa..062b648 100644 --- a/arch/cris/include/uapi/asm/unistd.h +++ b/arch/cris/include/uapi/asm/unistd.h @@ -356,5 +356,13 @@ #define __NR_process_vm_writev 349 #define __NR_kcmp 350 #define __NR_finit_module 351 +#define __NR_sched_setattr 352 +#define __NR_sched_getattr 353 +#define __NR_renameat2 354 +#define __NR_seccomp 355 +#define __NR_getrandom 356 +#define __NR_memfd_create 357 +#define __NR_bpf 358 +#define __NR_execveat 359 #endif /* _UAPI_ASM_CRIS_UNISTD_H_ */ -- cgit v0.10.2 From 94230178678cb7e0f82ff2c161ac84742f79a2e7 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 3 Jun 2015 12:25:51 +0200 Subject: CRISv32: ETRAX FS: Squash warnings in pinmux driver Squash the followng warnings arch/cris/arch-v32/mach-fs/pinmux.c: In function 'crisv32_pinmux_alloc_fixed': arch/cris/arch-v32/mach-fs/pinmux.c:104:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement] arch/cris/arch-v32/mach-fs/pinmux.c: In function 'crisv32_pinmux_dealloc_fixed': arch/cris/arch-v32/mach-fs/pinmux.c:238:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement] arch/cris/arch-v32/mach-fs/pinmux.c: In function '__crisv32_pinmux_alloc': arch/cris/arch-v32/mach-fs/pinmux.c:49:1: warning: control reaches end of non-void function [-Wreturn-type] Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c index 05a0470..d8a3a3c 100644 --- a/arch/cris/arch-v32/mach-fs/pinmux.c +++ b/arch/cris/arch-v32/mach-fs/pinmux.c @@ -46,6 +46,8 @@ static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin, pins[port][i] = mode; crisv32_pinmux_set(port); + + return 0; } static int crisv32_pinmux_init(void) @@ -93,6 +95,7 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function) int ret = -EINVAL; char saved[sizeof pins]; unsigned long flags; + reg_pinmux_rw_hwprot hwprot; spin_lock_irqsave(&pinmux_lock, flags); @@ -101,7 +104,7 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function) crisv32_pinmux_init(); /* Must be done before we read rw_hwprot */ - reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); + hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); switch (function) { case pinmux_ser1: @@ -227,6 +230,7 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) int ret = -EINVAL; char saved[sizeof pins]; unsigned long flags; + reg_pinmux_rw_hwprot hwprot; spin_lock_irqsave(&pinmux_lock, flags); @@ -235,7 +239,7 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) crisv32_pinmux_init(); /* Must be done before we read rw_hwprot */ - reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); + hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); switch (function) { case pinmux_ser1: -- cgit v0.10.2 From 939b83fb03f275016cd72a0f81f49bf8e63c2a14 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 3 Jun 2015 12:42:01 +0200 Subject: CRISv32: Add GPIO driver to the default configs Fix a number of small issues visible when GPIO is enabled: - Correct missing default for !ETRAXFS in Kconfig - Remove information on number of bits for some Kconfigs related to the GPIO, they are different in ETRAX FS and ARTPEC-3 - Fix compile warning in ARTPEC-3 GPIO driver Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 4fc16b4..e6c523c 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -202,7 +202,7 @@ config ETRAX_PA_CHANGEABLE_DIR default "0x00" if ETRAXFS default "0x00000000" if !ETRAXFS help - This is a bitmask (8 bits) with information of what bits in PA that a + This is a bitmask with information of what bits in PA that a user can change direction on using ioctl's. Bit set = changeable. You probably want 0 here, but it depends on your hardware. @@ -213,7 +213,7 @@ config ETRAX_PA_CHANGEABLE_BITS default "0x00" if ETRAXFS default "0x00000000" if !ETRAXFS help - This is a bitmask (8 bits) with information of what bits in PA + This is a bitmask with information of what bits in PA that a user can change the value on using ioctl's. Bit set = changeable. @@ -223,7 +223,7 @@ config ETRAX_PB_CHANGEABLE_DIR default "0x00000" if ETRAXFS default "0x00000000" if !ETRAXFS help - This is a bitmask (18 bits) with information of what bits in PB + This is a bitmask with information of what bits in PB that a user can change direction on using ioctl's. Bit set = changeable. You probably want 0 here, but it depends on your hardware. @@ -234,7 +234,7 @@ config ETRAX_PB_CHANGEABLE_BITS default "0x00000" if ETRAXFS default "0x00000000" if !ETRAXFS help - This is a bitmask (18 bits) with information of what bits in PB + This is a bitmask with information of what bits in PB that a user can change the value on using ioctl's. Bit set = changeable. @@ -244,7 +244,7 @@ config ETRAX_PC_CHANGEABLE_DIR default "0x00000" if ETRAXFS default "0x00000000" if !ETRAXFS help - This is a bitmask (18 bits) with information of what bits in PC + This is a bitmask with information of what bits in PC that a user can change direction on using ioctl's. Bit set = changeable. You probably want 0 here, but it depends on your hardware. @@ -253,9 +253,9 @@ config ETRAX_PC_CHANGEABLE_BITS hex "PC user changeable bits mask" depends on ETRAX_GPIO default "0x00000" if ETRAXFS - default "0x00000000" if ETRAXFS + default "0x00000000" if !ETRAXFS help - This is a bitmask (18 bits) with information of what bits in PC + This is a bitmask with information of what bits in PC that a user can change the value on using ioctl's. Bit set = changeable. @@ -264,7 +264,7 @@ config ETRAX_PD_CHANGEABLE_DIR depends on ETRAX_GPIO && ETRAXFS default "0x00000" help - This is a bitmask (18 bits) with information of what bits in PD + This is a bitmask with information of what bits in PD that a user can change direction on using ioctl's. Bit set = changeable. You probably want 0x00000 here, but it depends on your hardware. diff --git a/arch/cris/arch-v32/drivers/mach-a3/gpio.c b/arch/cris/arch-v32/drivers/mach-a3/gpio.c index 74f9fe8..c92e1da 100644 --- a/arch/cris/arch-v32/drivers/mach-a3/gpio.c +++ b/arch/cris/arch-v32/drivers/mach-a3/gpio.c @@ -957,7 +957,7 @@ static void __init virtual_gpio_init(void) static int __init gpio_init(void) { - int res; + int res, res2; printk(KERN_INFO "ETRAX FS GPIO driver v2.7, (c) 2003-2008 " "Axis Communications AB\n"); @@ -977,7 +977,7 @@ static int __init gpio_init(void) CRIS_LED_DISK_READ(0); CRIS_LED_DISK_WRITE(0); - int res2 = request_irq(GIO_INTR_VECT, gpio_interrupt, + res2 = request_irq(GIO_INTR_VECT, gpio_interrupt, IRQF_SHARED, "gpio", &alarmlist); if (res2) { printk(KERN_ERR "err: irq for gpio\n"); diff --git a/arch/cris/configs/artpec_3_defconfig b/arch/cris/configs/artpec_3_defconfig index 71854d4..70e497e 100644 --- a/arch/cris/configs/artpec_3_defconfig +++ b/arch/cris/configs/artpec_3_defconfig @@ -12,10 +12,6 @@ CONFIG_ETRAX_FAST_TIMER=y CONFIG_CRIS_MACH_ARTPEC3=y CONFIG_ETRAX_DRAM_SIZE=32 CONFIG_ETRAX_FLASH1_SIZE=4 -CONFIG_ETRAX_DEF_GIO_PA_OE=1c -CONFIG_ETRAX_DEF_GIO_PA_OUT=00 -CONFIG_ETRAX_DEF_GIO_PB_OE=00000 -CONFIG_ETRAX_DEF_GIO_PB_OUT=00000 CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -42,3 +38,4 @@ CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y +CONFIG_ETRAX_GPIO=y diff --git a/arch/cris/configs/etraxfs_defconfig b/arch/cris/configs/etraxfs_defconfig index 87c7227..9123268 100644 --- a/arch/cris/configs/etraxfs_defconfig +++ b/arch/cris/configs/etraxfs_defconfig @@ -38,3 +38,4 @@ CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y +CONFIG_ETRAX_GPIO=y -- cgit v0.10.2 From 4b86715748da506b176b5ec50c041b335c9d1769 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 3 Jun 2015 13:04:23 +0200 Subject: CRISv32: Squash compile warnings for axisflashmap Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index 28dd771..5387424 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -313,6 +313,7 @@ static int __init init_axis_flash(void) size_t len; int ram_rootfs_partition = -1; /* -1 => no RAM rootfs partition */ int part; + struct mtd_partition *partition; /* We need a root fs. If it resides in RAM, we need to use an * MTDRAM device, so it must be enabled in the kernel config, @@ -329,7 +330,7 @@ static int __init init_axis_flash(void) main_mtd = flash_probe(); if (main_mtd) - printk(KERN_INFO "%s: 0x%08x bytes of NOR flash memory.\n", + printk(KERN_INFO "%s: 0x%08llx bytes of NOR flash memory.\n", main_mtd->name, main_mtd->size); #ifdef CONFIG_ETRAX_NANDFLASH @@ -388,10 +389,10 @@ static int __init init_axis_flash(void) #endif if (main_mtd) { + loff_t ptable_sector = CONFIG_ETRAX_PTABLE_SECTOR; main_mtd->owner = THIS_MODULE; axisflash_mtd = main_mtd; - loff_t ptable_sector = CONFIG_ETRAX_PTABLE_SECTOR; /* First partition (rescue) is always set to the default. */ pidx++; @@ -517,7 +518,7 @@ static int __init init_axis_flash(void) /* Decide whether to use default partition table. */ /* Only use default table if we actually have a device (main_mtd) */ - struct mtd_partition *partition = &axis_partitions[0]; + partition = &axis_partitions[0]; if (main_mtd && !ptable_ok) { memcpy(axis_partitions, axis_default_partitions, sizeof(axis_default_partitions)); @@ -580,7 +581,7 @@ static int __init init_axis_flash(void) printk(KERN_INFO "axisflashmap: Adding RAM partition " "for rootfs image.\n"); err = mtdram_init_device(mtd_ram, - (void *)partition[part].offset, + (void *)(u_int32_t)partition[part].offset, partition[part].size, partition[part].name); if (err) -- cgit v0.10.2 From 835e4177284e1574033dea47d4bb8e55c21b6345 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 27 Mar 2015 22:32:27 +0100 Subject: CRIS: UAPI: fix ptrace.h The exported ptrace.h header on CRIS references an "arch" directory which does not exist. Fix this by having the variants in the same directory and including them conditionally, similar to other architectures. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c index 3a36ae6..150d1d7 100644 --- a/arch/cris/arch-v32/kernel/signal.c +++ b/arch/cris/arch-v32/kernel/signal.c @@ -19,7 +19,6 @@ #include #include #include -#include #include extern unsigned long cris_signal_return_page; diff --git a/arch/cris/include/arch-v10/arch/ptrace.h b/arch/cris/include/arch-v10/arch/ptrace.h deleted file mode 100644 index 1a23273..0000000 --- a/arch/cris/include/arch-v10/arch/ptrace.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef _CRIS_ARCH_PTRACE_H -#define _CRIS_ARCH_PTRACE_H - -/* Frame types */ - -#define CRIS_FRAME_NORMAL 0 /* normal frame without SBFS stacking */ -#define CRIS_FRAME_BUSFAULT 1 /* frame stacked using SBFS, need RBF return - path */ - -/* Register numbers in the ptrace system call interface */ - -#define PT_FRAMETYPE 0 -#define PT_ORIG_R10 1 -#define PT_R13 2 -#define PT_R12 3 -#define PT_R11 4 -#define PT_R10 5 -#define PT_R9 6 -#define PT_R8 7 -#define PT_R7 8 -#define PT_R6 9 -#define PT_R5 10 -#define PT_R4 11 -#define PT_R3 12 -#define PT_R2 13 -#define PT_R1 14 -#define PT_R0 15 -#define PT_MOF 16 -#define PT_DCCR 17 -#define PT_SRP 18 -#define PT_IRP 19 /* This is actually the debugged process' PC */ -#define PT_CSRINSTR 20 /* CPU Status record remnants - - valid if frametype == busfault */ -#define PT_CSRADDR 21 -#define PT_CSRDATA 22 -#define PT_USP 23 /* special case - USP is not in the pt_regs */ -#define PT_MAX 23 - -/* Condition code bit numbers. The same numbers apply to CCR of course, - but we use DCCR everywhere else, so let's try and be consistent. */ -#define C_DCCR_BITNR 0 -#define V_DCCR_BITNR 1 -#define Z_DCCR_BITNR 2 -#define N_DCCR_BITNR 3 -#define X_DCCR_BITNR 4 -#define I_DCCR_BITNR 5 -#define B_DCCR_BITNR 6 -#define M_DCCR_BITNR 7 -#define U_DCCR_BITNR 8 -#define P_DCCR_BITNR 9 -#define F_DCCR_BITNR 10 - -/* pt_regs not only specifices the format in the user-struct during - * ptrace but is also the frame format used in the kernel prologue/epilogues - * themselves - */ - -struct pt_regs { - unsigned long frametype; /* type of stackframe */ - unsigned long orig_r10; - /* pushed by movem r13, [sp] in SAVE_ALL, movem pushes backwards */ - unsigned long r13; - unsigned long r12; - unsigned long r11; - unsigned long r10; - unsigned long r9; - unsigned long r8; - unsigned long r7; - unsigned long r6; - unsigned long r5; - unsigned long r4; - unsigned long r3; - unsigned long r2; - unsigned long r1; - unsigned long r0; - unsigned long mof; - unsigned long dccr; - unsigned long srp; - unsigned long irp; /* This is actually the debugged process' PC */ - unsigned long csrinstr; - unsigned long csraddr; - unsigned long csrdata; -}; - -/* switch_stack is the extra stuff pushed onto the stack in _resume (entry.S) - * when doing a context-switch. it is used (apart from in resume) when a new - * thread is made and we need to make _resume (which is starting it for the - * first time) realise what is going on. - * - * Actually, the use is very close to the thread struct (TSS) in that both the - * switch_stack and the TSS are used to keep thread stuff when switching in - * _resume. - */ - -struct switch_stack { - unsigned long r9; - unsigned long r8; - unsigned long r7; - unsigned long r6; - unsigned long r5; - unsigned long r4; - unsigned long r3; - unsigned long r2; - unsigned long r1; - unsigned long r0; - unsigned long return_ip; /* ip that _resume will return to */ -}; - -#ifdef __KERNEL__ - -/* bit 8 is user-mode flag */ -#define user_mode(regs) (((regs)->dccr & 0x100) != 0) -#define instruction_pointer(regs) ((regs)->irp) -#define profile_pc(regs) instruction_pointer(regs) - -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/cris/include/arch-v32/arch/irqflags.h b/arch/cris/include/arch-v32/arch/irqflags.h index 041851f..5f6fddf 100644 --- a/arch/cris/include/arch-v32/arch/irqflags.h +++ b/arch/cris/include/arch-v32/arch/irqflags.h @@ -2,7 +2,7 @@ #define __ASM_CRIS_ARCH_IRQFLAGS_H #include -#include +#include static inline unsigned long arch_local_save_flags(void) { diff --git a/arch/cris/include/arch-v32/arch/ptrace.h b/arch/cris/include/arch-v32/arch/ptrace.h deleted file mode 100644 index 19773d3..0000000 --- a/arch/cris/include/arch-v32/arch/ptrace.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef _CRIS_ARCH_PTRACE_H -#define _CRIS_ARCH_PTRACE_H - -/* Register numbers in the ptrace system call interface */ - -#define PT_ORIG_R10 0 -#define PT_R0 1 -#define PT_R1 2 -#define PT_R2 3 -#define PT_R3 4 -#define PT_R4 5 -#define PT_R5 6 -#define PT_R6 7 -#define PT_R7 8 -#define PT_R8 9 -#define PT_R9 10 -#define PT_R10 11 -#define PT_R11 12 -#define PT_R12 13 -#define PT_R13 14 -#define PT_ACR 15 -#define PT_SRS 16 -#define PT_MOF 17 -#define PT_SPC 18 -#define PT_CCS 19 -#define PT_SRP 20 -#define PT_ERP 21 /* This is actually the debugged process' PC */ -#define PT_EXS 22 -#define PT_EDA 23 -#define PT_USP 24 /* special case - USP is not in the pt_regs */ -#define PT_PPC 25 /* special case - pseudo PC */ -#define PT_BP 26 /* Base number for BP registers. */ -#define PT_BP_CTRL 26 /* BP control register. */ -#define PT_MAX 40 - -/* Condition code bit numbers. */ -#define C_CCS_BITNR 0 -#define V_CCS_BITNR 1 -#define Z_CCS_BITNR 2 -#define N_CCS_BITNR 3 -#define X_CCS_BITNR 4 -#define I_CCS_BITNR 5 -#define U_CCS_BITNR 6 -#define P_CCS_BITNR 7 -#define R_CCS_BITNR 8 -#define S_CCS_BITNR 9 -#define M_CCS_BITNR 30 -#define Q_CCS_BITNR 31 -#define CCS_SHIFT 10 /* Shift count for each level in CCS */ - -/* pt_regs not only specifices the format in the user-struct during - * ptrace but is also the frame format used in the kernel prologue/epilogues - * themselves - */ - -struct pt_regs { - unsigned long orig_r10; - /* pushed by movem r13, [sp] in SAVE_ALL. */ - unsigned long r0; - unsigned long r1; - unsigned long r2; - unsigned long r3; - unsigned long r4; - unsigned long r5; - unsigned long r6; - unsigned long r7; - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long acr; - unsigned long srs; - unsigned long mof; - unsigned long spc; - unsigned long ccs; - unsigned long srp; - unsigned long erp; /* This is actually the debugged process' PC */ - /* For debugging purposes; saved only when needed. */ - unsigned long exs; - unsigned long eda; -}; - -/* switch_stack is the extra stuff pushed onto the stack in _resume (entry.S) - * when doing a context-switch. it is used (apart from in resume) when a new - * thread is made and we need to make _resume (which is starting it for the - * first time) realise what is going on. - * - * Actually, the use is very close to the thread struct (TSS) in that both the - * switch_stack and the TSS are used to keep thread stuff when switching in - * _resume. - */ - -struct switch_stack { - unsigned long r0; - unsigned long r1; - unsigned long r2; - unsigned long r3; - unsigned long r4; - unsigned long r5; - unsigned long r6; - unsigned long r7; - unsigned long r8; - unsigned long r9; - unsigned long return_ip; /* ip that _resume will return to */ -}; - -#ifdef __KERNEL__ - -#define arch_has_single_step() (1) -#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0) -#define instruction_pointer(regs) ((regs)->erp) -#define profile_pc(regs) instruction_pointer(regs) - -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 01f66b8..8443dc8 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -19,6 +19,8 @@ header-y += param.h header-y += poll.h header-y += posix_types.h header-y += ptrace.h +header-y += ptrace_v10.h +header-y += ptrace_v32.h header-y += resource.h header-y += rs485.h header-y += sembuf.h diff --git a/arch/cris/include/uapi/asm/ptrace.h b/arch/cris/include/uapi/asm/ptrace.h index c689c9b..bd8946f 100644 --- a/arch/cris/include/uapi/asm/ptrace.h +++ b/arch/cris/include/uapi/asm/ptrace.h @@ -1 +1,5 @@ -#include +#ifdef __arch_v32 +#include +#else +#include +#endif diff --git a/arch/cris/include/uapi/asm/ptrace_v10.h b/arch/cris/include/uapi/asm/ptrace_v10.h new file mode 100644 index 0000000..1a23273 --- /dev/null +++ b/arch/cris/include/uapi/asm/ptrace_v10.h @@ -0,0 +1,118 @@ +#ifndef _CRIS_ARCH_PTRACE_H +#define _CRIS_ARCH_PTRACE_H + +/* Frame types */ + +#define CRIS_FRAME_NORMAL 0 /* normal frame without SBFS stacking */ +#define CRIS_FRAME_BUSFAULT 1 /* frame stacked using SBFS, need RBF return + path */ + +/* Register numbers in the ptrace system call interface */ + +#define PT_FRAMETYPE 0 +#define PT_ORIG_R10 1 +#define PT_R13 2 +#define PT_R12 3 +#define PT_R11 4 +#define PT_R10 5 +#define PT_R9 6 +#define PT_R8 7 +#define PT_R7 8 +#define PT_R6 9 +#define PT_R5 10 +#define PT_R4 11 +#define PT_R3 12 +#define PT_R2 13 +#define PT_R1 14 +#define PT_R0 15 +#define PT_MOF 16 +#define PT_DCCR 17 +#define PT_SRP 18 +#define PT_IRP 19 /* This is actually the debugged process' PC */ +#define PT_CSRINSTR 20 /* CPU Status record remnants - + valid if frametype == busfault */ +#define PT_CSRADDR 21 +#define PT_CSRDATA 22 +#define PT_USP 23 /* special case - USP is not in the pt_regs */ +#define PT_MAX 23 + +/* Condition code bit numbers. The same numbers apply to CCR of course, + but we use DCCR everywhere else, so let's try and be consistent. */ +#define C_DCCR_BITNR 0 +#define V_DCCR_BITNR 1 +#define Z_DCCR_BITNR 2 +#define N_DCCR_BITNR 3 +#define X_DCCR_BITNR 4 +#define I_DCCR_BITNR 5 +#define B_DCCR_BITNR 6 +#define M_DCCR_BITNR 7 +#define U_DCCR_BITNR 8 +#define P_DCCR_BITNR 9 +#define F_DCCR_BITNR 10 + +/* pt_regs not only specifices the format in the user-struct during + * ptrace but is also the frame format used in the kernel prologue/epilogues + * themselves + */ + +struct pt_regs { + unsigned long frametype; /* type of stackframe */ + unsigned long orig_r10; + /* pushed by movem r13, [sp] in SAVE_ALL, movem pushes backwards */ + unsigned long r13; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; + unsigned long mof; + unsigned long dccr; + unsigned long srp; + unsigned long irp; /* This is actually the debugged process' PC */ + unsigned long csrinstr; + unsigned long csraddr; + unsigned long csrdata; +}; + +/* switch_stack is the extra stuff pushed onto the stack in _resume (entry.S) + * when doing a context-switch. it is used (apart from in resume) when a new + * thread is made and we need to make _resume (which is starting it for the + * first time) realise what is going on. + * + * Actually, the use is very close to the thread struct (TSS) in that both the + * switch_stack and the TSS are used to keep thread stuff when switching in + * _resume. + */ + +struct switch_stack { + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; + unsigned long return_ip; /* ip that _resume will return to */ +}; + +#ifdef __KERNEL__ + +/* bit 8 is user-mode flag */ +#define user_mode(regs) (((regs)->dccr & 0x100) != 0) +#define instruction_pointer(regs) ((regs)->irp) +#define profile_pc(regs) instruction_pointer(regs) + +#endif /* __KERNEL__ */ + +#endif diff --git a/arch/cris/include/uapi/asm/ptrace_v32.h b/arch/cris/include/uapi/asm/ptrace_v32.h new file mode 100644 index 0000000..19773d3 --- /dev/null +++ b/arch/cris/include/uapi/asm/ptrace_v32.h @@ -0,0 +1,118 @@ +#ifndef _CRIS_ARCH_PTRACE_H +#define _CRIS_ARCH_PTRACE_H + +/* Register numbers in the ptrace system call interface */ + +#define PT_ORIG_R10 0 +#define PT_R0 1 +#define PT_R1 2 +#define PT_R2 3 +#define PT_R3 4 +#define PT_R4 5 +#define PT_R5 6 +#define PT_R6 7 +#define PT_R7 8 +#define PT_R8 9 +#define PT_R9 10 +#define PT_R10 11 +#define PT_R11 12 +#define PT_R12 13 +#define PT_R13 14 +#define PT_ACR 15 +#define PT_SRS 16 +#define PT_MOF 17 +#define PT_SPC 18 +#define PT_CCS 19 +#define PT_SRP 20 +#define PT_ERP 21 /* This is actually the debugged process' PC */ +#define PT_EXS 22 +#define PT_EDA 23 +#define PT_USP 24 /* special case - USP is not in the pt_regs */ +#define PT_PPC 25 /* special case - pseudo PC */ +#define PT_BP 26 /* Base number for BP registers. */ +#define PT_BP_CTRL 26 /* BP control register. */ +#define PT_MAX 40 + +/* Condition code bit numbers. */ +#define C_CCS_BITNR 0 +#define V_CCS_BITNR 1 +#define Z_CCS_BITNR 2 +#define N_CCS_BITNR 3 +#define X_CCS_BITNR 4 +#define I_CCS_BITNR 5 +#define U_CCS_BITNR 6 +#define P_CCS_BITNR 7 +#define R_CCS_BITNR 8 +#define S_CCS_BITNR 9 +#define M_CCS_BITNR 30 +#define Q_CCS_BITNR 31 +#define CCS_SHIFT 10 /* Shift count for each level in CCS */ + +/* pt_regs not only specifices the format in the user-struct during + * ptrace but is also the frame format used in the kernel prologue/epilogues + * themselves + */ + +struct pt_regs { + unsigned long orig_r10; + /* pushed by movem r13, [sp] in SAVE_ALL. */ + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long acr; + unsigned long srs; + unsigned long mof; + unsigned long spc; + unsigned long ccs; + unsigned long srp; + unsigned long erp; /* This is actually the debugged process' PC */ + /* For debugging purposes; saved only when needed. */ + unsigned long exs; + unsigned long eda; +}; + +/* switch_stack is the extra stuff pushed onto the stack in _resume (entry.S) + * when doing a context-switch. it is used (apart from in resume) when a new + * thread is made and we need to make _resume (which is starting it for the + * first time) realise what is going on. + * + * Actually, the use is very close to the thread struct (TSS) in that both the + * switch_stack and the TSS are used to keep thread stuff when switching in + * _resume. + */ + +struct switch_stack { + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long return_ip; /* ip that _resume will return to */ +}; + +#ifdef __KERNEL__ + +#define arch_has_single_step() (1) +#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0) +#define instruction_pointer(regs) ((regs)->erp) +#define profile_pc(regs) instruction_pointer(regs) + +#endif /* __KERNEL__ */ + +#endif -- cgit v0.10.2 From 3a79a075375cebb672ddaea7896378afa0203e87 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 14 May 2015 17:00:58 +0200 Subject: CRIS: don't make asm/elf.h depend on asm/user.h We're going to export asm/elf.h; remove its dependencies on the non-exported asm/user.h and the unused asm/system.h include. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/arch-v10/arch/elf.h b/arch/cris/include/arch-v10/arch/elf.h index 1eb638a..3ea65ce 100644 --- a/arch/cris/include/arch-v10/arch/elf.h +++ b/arch/cris/include/arch-v10/arch/elf.h @@ -1,10 +1,11 @@ #ifndef __ASMCRIS_ARCH_ELF_H #define __ASMCRIS_ARCH_ELF_H -#include - #define ELF_MACH EF_CRIS_VARIANT_ANY_V0_V10 +/* Matches struct user_regs_struct */ +#define ELF_NGREG 35 + /* * This is used to ensure we don't load something for the wrong architecture. */ diff --git a/arch/cris/include/arch-v32/arch/elf.h b/arch/cris/include/arch-v32/arch/elf.h index c46d582..f09fe49 100644 --- a/arch/cris/include/arch-v32/arch/elf.h +++ b/arch/cris/include/arch-v32/arch/elf.h @@ -1,10 +1,11 @@ #ifndef _ASM_CRIS_ELF_H #define _ASM_CRIS_ELF_H -#include - #define ELF_CORE_EFLAGS EF_CRIS_VARIANT_V32 +/* Matches struct user_regs_struct */ +#define ELF_NGREG 32 + /* * This is used to ensure we don't load something for the wrong architecture. */ diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h index c2a394f..986adba 100644 --- a/arch/cris/include/asm/elf.h +++ b/arch/cris/include/asm/elf.h @@ -5,7 +5,7 @@ * ELF register definitions.. */ -#include +#include #define R_CRIS_NONE 0 #define R_CRIS_8 1 @@ -32,7 +32,6 @@ typedef unsigned long elf_greg_t; /* Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is thus exposed to user-space. */ -#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; /* A placeholder; CRIS does not have any fp regs. */ @@ -45,8 +44,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_CRIS -#include - /* The master for these definitions is {binutils}/include/elf/cris.h: */ /* User symbols in this file have a leading underscore. */ #define EF_CRIS_UNDERSCORE 0x00000001 -- cgit v0.10.2 From 2493d3e28a22087116158249d79410fd355d1e83 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 27 Mar 2015 22:35:11 +0100 Subject: CRIS: UAPI: fix elf.h export CRIS userspace (uClibc for one) expects asm/elf.h to be exported but this header appears to have gone missing at some point. Move it to uapi/ and export it. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/arch-v10/arch/elf.h b/arch/cris/include/arch-v10/arch/elf.h deleted file mode 100644 index 3ea65ce..0000000 --- a/arch/cris/include/arch-v10/arch/elf.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef __ASMCRIS_ARCH_ELF_H -#define __ASMCRIS_ARCH_ELF_H - -#define ELF_MACH EF_CRIS_VARIANT_ANY_V0_V10 - -/* Matches struct user_regs_struct */ -#define ELF_NGREG 35 - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) \ - ((x)->e_machine == EM_CRIS \ - && ((((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_ANY_V0_V10 \ - || (((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_COMMON_V10_V32)))) - -/* - * ELF register definitions.. - */ - -#include - -/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program - starts (a register; assume first param register for CRIS) - contains a pointer to a function which might be - registered using `atexit'. This provides a mean for the - dynamic linker to call DT_FINI functions for shared libraries - that have been loaded before the code runs. - - A value of 0 tells we have no such handler. */ - -/* Explicitly set registers to 0 to increase determinism. */ -#define ELF_PLAT_INIT(_r, load_addr) do { \ - (_r)->r13 = 0; (_r)->r12 = 0; (_r)->r11 = 0; (_r)->r10 = 0; \ - (_r)->r9 = 0; (_r)->r8 = 0; (_r)->r7 = 0; (_r)->r6 = 0; \ - (_r)->r5 = 0; (_r)->r4 = 0; (_r)->r3 = 0; (_r)->r2 = 0; \ - (_r)->r1 = 0; (_r)->r0 = 0; (_r)->mof = 0; (_r)->srp = 0; \ -} while (0) - -/* The additional layer below is because the stack pointer is missing in - the pt_regs struct, but needed in a core dump. pr_reg is a elf_gregset_t, - and should be filled in according to the layout of the user_regs_struct - struct; regs is a pt_regs struct. We dump all registers, though several are - obviously unnecessary. That way there's less need for intelligence at - the receiving end (i.e. gdb). */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ - pr_reg[0] = regs->r0; \ - pr_reg[1] = regs->r1; \ - pr_reg[2] = regs->r2; \ - pr_reg[3] = regs->r3; \ - pr_reg[4] = regs->r4; \ - pr_reg[5] = regs->r5; \ - pr_reg[6] = regs->r6; \ - pr_reg[7] = regs->r7; \ - pr_reg[8] = regs->r8; \ - pr_reg[9] = regs->r9; \ - pr_reg[10] = regs->r10; \ - pr_reg[11] = regs->r11; \ - pr_reg[12] = regs->r12; \ - pr_reg[13] = regs->r13; \ - pr_reg[14] = rdusp(); /* sp */ \ - pr_reg[15] = regs->irp; /* pc */ \ - pr_reg[16] = 0; /* p0 */ \ - pr_reg[17] = rdvr(); /* vr */ \ - pr_reg[18] = 0; /* p2 */ \ - pr_reg[19] = 0; /* p3 */ \ - pr_reg[20] = 0; /* p4 */ \ - pr_reg[21] = (regs->dccr & 0xffff); /* ccr */ \ - pr_reg[22] = 0; /* p6 */ \ - pr_reg[23] = regs->mof; /* mof */ \ - pr_reg[24] = 0; /* p8 */ \ - pr_reg[25] = 0; /* ibr */ \ - pr_reg[26] = 0; /* irp */ \ - pr_reg[27] = regs->srp; /* srp */ \ - pr_reg[28] = 0; /* bar */ \ - pr_reg[29] = regs->dccr; /* dccr */ \ - pr_reg[30] = 0; /* brp */ \ - pr_reg[31] = rdusp(); /* usp */ \ - pr_reg[32] = 0; /* csrinstr */ \ - pr_reg[33] = 0; /* csraddr */ \ - pr_reg[34] = 0; /* csrdata */ - - -#endif diff --git a/arch/cris/include/arch-v32/arch/elf.h b/arch/cris/include/arch-v32/arch/elf.h deleted file mode 100644 index f09fe49..0000000 --- a/arch/cris/include/arch-v32/arch/elf.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _ASM_CRIS_ELF_H -#define _ASM_CRIS_ELF_H - -#define ELF_CORE_EFLAGS EF_CRIS_VARIANT_V32 - -/* Matches struct user_regs_struct */ -#define ELF_NGREG 32 - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) \ - ((x)->e_machine == EM_CRIS \ - && ((((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_V32 \ - || (((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_COMMON_V10_V32)))) - -/* CRISv32 ELF register definitions. */ - -#include - -/* Explicitly zero out registers to increase determinism. */ -#define ELF_PLAT_INIT(_r, load_addr) do { \ - (_r)->r13 = 0; (_r)->r12 = 0; (_r)->r11 = 0; (_r)->r10 = 0; \ - (_r)->r9 = 0; (_r)->r8 = 0; (_r)->r7 = 0; (_r)->r6 = 0; \ - (_r)->r5 = 0; (_r)->r4 = 0; (_r)->r3 = 0; (_r)->r2 = 0; \ - (_r)->r1 = 0; (_r)->r0 = 0; (_r)->mof = 0; (_r)->srp = 0; \ - (_r)->acr = 0; \ -} while (0) - -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec_binary(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) - -/* - * This is basically a pt_regs with the additional definition - * of the stack pointer since it's needed in a core dump. - * pr_regs is a elf_gregset_t and should be filled according - * to the layout of user_regs_struct. - */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ - pr_reg[0] = regs->r0; \ - pr_reg[1] = regs->r1; \ - pr_reg[2] = regs->r2; \ - pr_reg[3] = regs->r3; \ - pr_reg[4] = regs->r4; \ - pr_reg[5] = regs->r5; \ - pr_reg[6] = regs->r6; \ - pr_reg[7] = regs->r7; \ - pr_reg[8] = regs->r8; \ - pr_reg[9] = regs->r9; \ - pr_reg[10] = regs->r10; \ - pr_reg[11] = regs->r11; \ - pr_reg[12] = regs->r12; \ - pr_reg[13] = regs->r13; \ - pr_reg[14] = rdusp(); /* SP */ \ - pr_reg[15] = regs->acr; /* ACR */ \ - pr_reg[16] = 0; /* BZ */ \ - pr_reg[17] = rdvr(); /* VR */ \ - pr_reg[18] = 0; /* PID */ \ - pr_reg[19] = regs->srs; /* SRS */ \ - pr_reg[20] = 0; /* WZ */ \ - pr_reg[21] = regs->exs; /* EXS */ \ - pr_reg[22] = regs->eda; /* EDA */ \ - pr_reg[23] = regs->mof; /* MOF */ \ - pr_reg[24] = 0; /* DZ */ \ - pr_reg[25] = 0; /* EBP */ \ - pr_reg[26] = regs->erp; /* ERP */ \ - pr_reg[27] = regs->srp; /* SRP */ \ - pr_reg[28] = 0; /* NRP */ \ - pr_reg[29] = regs->ccs; /* CCS */ \ - pr_reg[30] = rdusp(); /* USP */ \ - pr_reg[31] = regs->spc; /* SPC */ \ - -#endif /* _ASM_CRIS_ELF_H */ diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h deleted file mode 100644 index 986adba..0000000 --- a/arch/cris/include/asm/elf.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef __ASMCRIS_ELF_H -#define __ASMCRIS_ELF_H - -/* - * ELF register definitions.. - */ - -#include - -#define R_CRIS_NONE 0 -#define R_CRIS_8 1 -#define R_CRIS_16 2 -#define R_CRIS_32 3 -#define R_CRIS_8_PCREL 4 -#define R_CRIS_16_PCREL 5 -#define R_CRIS_32_PCREL 6 -#define R_CRIS_GNU_VTINHERIT 7 -#define R_CRIS_GNU_VTENTRY 8 -#define R_CRIS_COPY 9 -#define R_CRIS_GLOB_DAT 10 -#define R_CRIS_JUMP_SLOT 11 -#define R_CRIS_RELATIVE 12 -#define R_CRIS_16_GOT 13 -#define R_CRIS_32_GOT 14 -#define R_CRIS_16_GOTPLT 15 -#define R_CRIS_32_GOTPLT 16 -#define R_CRIS_32_GOTREL 17 -#define R_CRIS_32_PLT_GOTREL 18 -#define R_CRIS_32_PLT_PCREL 19 - -typedef unsigned long elf_greg_t; - -/* Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is - thus exposed to user-space. */ -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; - -/* A placeholder; CRIS does not have any fp regs. */ -typedef unsigned long elf_fpregset_t; - -/* - * These are used to set parameters in the core dumps. - */ -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_CRIS - -/* The master for these definitions is {binutils}/include/elf/cris.h: */ -/* User symbols in this file have a leading underscore. */ -#define EF_CRIS_UNDERSCORE 0x00000001 - -/* This is a mask for different incompatible machine variants. */ -#define EF_CRIS_VARIANT_MASK 0x0000000e - -/* Variant 0; may contain v0..10 object. */ -#define EF_CRIS_VARIANT_ANY_V0_V10 0x00000000 - -/* Variant 1; contains v32 object. */ -#define EF_CRIS_VARIANT_V32 0x00000002 - -/* Variant 2; contains object compatible with v32 and v10. */ -#define EF_CRIS_VARIANT_COMMON_V10_V32 0x00000004 -/* End of excerpt from {binutils}/include/elf/cris.h. */ - -#define ELF_EXEC_PAGESIZE 8192 - -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) - -/* This yields a mask that user programs can use to figure out what - instruction set this CPU supports. This could be done in user space, - but it's not easy, and we've already done it here. */ - -#define ELF_HWCAP (0) - -/* This yields a string that ld.so will use to load implementation - specific libraries for optimization. This is more specific in - intent than poking at uname or /proc/cpuinfo. -*/ - -#define ELF_PLATFORM (NULL) - -#endif diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 8443dc8..d5564a0 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -6,6 +6,9 @@ header-y += ../arch-v32/arch/ header-y += auxvec.h header-y += bitsperlong.h header-y += byteorder.h +header-y += elf.h +header-y += elf_v10.h +header-y += elf_v32.h header-y += errno.h header-y += ethernet.h header-y += etraxgpio.h diff --git a/arch/cris/include/uapi/asm/elf.h b/arch/cris/include/uapi/asm/elf.h new file mode 100644 index 0000000..a5df05b --- /dev/null +++ b/arch/cris/include/uapi/asm/elf.h @@ -0,0 +1,90 @@ +#ifndef __ASMCRIS_ELF_H +#define __ASMCRIS_ELF_H + +/* + * ELF register definitions.. + */ + +#ifdef __arch_v32 +#include +#else +#include +#endif + +#define R_CRIS_NONE 0 +#define R_CRIS_8 1 +#define R_CRIS_16 2 +#define R_CRIS_32 3 +#define R_CRIS_8_PCREL 4 +#define R_CRIS_16_PCREL 5 +#define R_CRIS_32_PCREL 6 +#define R_CRIS_GNU_VTINHERIT 7 +#define R_CRIS_GNU_VTENTRY 8 +#define R_CRIS_COPY 9 +#define R_CRIS_GLOB_DAT 10 +#define R_CRIS_JUMP_SLOT 11 +#define R_CRIS_RELATIVE 12 +#define R_CRIS_16_GOT 13 +#define R_CRIS_32_GOT 14 +#define R_CRIS_16_GOTPLT 15 +#define R_CRIS_32_GOTPLT 16 +#define R_CRIS_32_GOTREL 17 +#define R_CRIS_32_PLT_GOTREL 18 +#define R_CRIS_32_PLT_PCREL 19 + +typedef unsigned long elf_greg_t; + +/* Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is + thus exposed to user-space. */ +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +/* A placeholder; CRIS does not have any fp regs. */ +typedef unsigned long elf_fpregset_t; + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_CRIS + +/* The master for these definitions is {binutils}/include/elf/cris.h: */ +/* User symbols in this file have a leading underscore. */ +#define EF_CRIS_UNDERSCORE 0x00000001 + +/* This is a mask for different incompatible machine variants. */ +#define EF_CRIS_VARIANT_MASK 0x0000000e + +/* Variant 0; may contain v0..10 object. */ +#define EF_CRIS_VARIANT_ANY_V0_V10 0x00000000 + +/* Variant 1; contains v32 object. */ +#define EF_CRIS_VARIANT_V32 0x00000002 + +/* Variant 2; contains object compatible with v32 and v10. */ +#define EF_CRIS_VARIANT_COMMON_V10_V32 0x00000004 +/* End of excerpt from {binutils}/include/elf/cris.h. */ + +#define ELF_EXEC_PAGESIZE 8192 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. +*/ + +#define ELF_PLATFORM (NULL) + +#endif diff --git a/arch/cris/include/uapi/asm/elf_v10.h b/arch/cris/include/uapi/asm/elf_v10.h new file mode 100644 index 0000000..3ea65ce --- /dev/null +++ b/arch/cris/include/uapi/asm/elf_v10.h @@ -0,0 +1,84 @@ +#ifndef __ASMCRIS_ARCH_ELF_H +#define __ASMCRIS_ARCH_ELF_H + +#define ELF_MACH EF_CRIS_VARIANT_ANY_V0_V10 + +/* Matches struct user_regs_struct */ +#define ELF_NGREG 35 + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_CRIS \ + && ((((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_ANY_V0_V10 \ + || (((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_COMMON_V10_V32)))) + +/* + * ELF register definitions.. + */ + +#include + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program + starts (a register; assume first param register for CRIS) + contains a pointer to a function which might be + registered using `atexit'. This provides a mean for the + dynamic linker to call DT_FINI functions for shared libraries + that have been loaded before the code runs. + + A value of 0 tells we have no such handler. */ + +/* Explicitly set registers to 0 to increase determinism. */ +#define ELF_PLAT_INIT(_r, load_addr) do { \ + (_r)->r13 = 0; (_r)->r12 = 0; (_r)->r11 = 0; (_r)->r10 = 0; \ + (_r)->r9 = 0; (_r)->r8 = 0; (_r)->r7 = 0; (_r)->r6 = 0; \ + (_r)->r5 = 0; (_r)->r4 = 0; (_r)->r3 = 0; (_r)->r2 = 0; \ + (_r)->r1 = 0; (_r)->r0 = 0; (_r)->mof = 0; (_r)->srp = 0; \ +} while (0) + +/* The additional layer below is because the stack pointer is missing in + the pt_regs struct, but needed in a core dump. pr_reg is a elf_gregset_t, + and should be filled in according to the layout of the user_regs_struct + struct; regs is a pt_regs struct. We dump all registers, though several are + obviously unnecessary. That way there's less need for intelligence at + the receiving end (i.e. gdb). */ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->r0; \ + pr_reg[1] = regs->r1; \ + pr_reg[2] = regs->r2; \ + pr_reg[3] = regs->r3; \ + pr_reg[4] = regs->r4; \ + pr_reg[5] = regs->r5; \ + pr_reg[6] = regs->r6; \ + pr_reg[7] = regs->r7; \ + pr_reg[8] = regs->r8; \ + pr_reg[9] = regs->r9; \ + pr_reg[10] = regs->r10; \ + pr_reg[11] = regs->r11; \ + pr_reg[12] = regs->r12; \ + pr_reg[13] = regs->r13; \ + pr_reg[14] = rdusp(); /* sp */ \ + pr_reg[15] = regs->irp; /* pc */ \ + pr_reg[16] = 0; /* p0 */ \ + pr_reg[17] = rdvr(); /* vr */ \ + pr_reg[18] = 0; /* p2 */ \ + pr_reg[19] = 0; /* p3 */ \ + pr_reg[20] = 0; /* p4 */ \ + pr_reg[21] = (regs->dccr & 0xffff); /* ccr */ \ + pr_reg[22] = 0; /* p6 */ \ + pr_reg[23] = regs->mof; /* mof */ \ + pr_reg[24] = 0; /* p8 */ \ + pr_reg[25] = 0; /* ibr */ \ + pr_reg[26] = 0; /* irp */ \ + pr_reg[27] = regs->srp; /* srp */ \ + pr_reg[28] = 0; /* bar */ \ + pr_reg[29] = regs->dccr; /* dccr */ \ + pr_reg[30] = 0; /* brp */ \ + pr_reg[31] = rdusp(); /* usp */ \ + pr_reg[32] = 0; /* csrinstr */ \ + pr_reg[33] = 0; /* csraddr */ \ + pr_reg[34] = 0; /* csrdata */ + + +#endif diff --git a/arch/cris/include/uapi/asm/elf_v32.h b/arch/cris/include/uapi/asm/elf_v32.h new file mode 100644 index 0000000..f09fe49 --- /dev/null +++ b/arch/cris/include/uapi/asm/elf_v32.h @@ -0,0 +1,76 @@ +#ifndef _ASM_CRIS_ELF_H +#define _ASM_CRIS_ELF_H + +#define ELF_CORE_EFLAGS EF_CRIS_VARIANT_V32 + +/* Matches struct user_regs_struct */ +#define ELF_NGREG 32 + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_CRIS \ + && ((((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_V32 \ + || (((x)->e_flags & EF_CRIS_VARIANT_MASK) == EF_CRIS_VARIANT_COMMON_V10_V32)))) + +/* CRISv32 ELF register definitions. */ + +#include + +/* Explicitly zero out registers to increase determinism. */ +#define ELF_PLAT_INIT(_r, load_addr) do { \ + (_r)->r13 = 0; (_r)->r12 = 0; (_r)->r11 = 0; (_r)->r10 = 0; \ + (_r)->r9 = 0; (_r)->r8 = 0; (_r)->r7 = 0; (_r)->r6 = 0; \ + (_r)->r5 = 0; (_r)->r4 = 0; (_r)->r3 = 0; (_r)->r2 = 0; \ + (_r)->r1 = 0; (_r)->r0 = 0; (_r)->mof = 0; (_r)->srp = 0; \ + (_r)->acr = 0; \ +} while (0) + +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + */ +#define elf_read_implies_exec_binary(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) + +/* + * This is basically a pt_regs with the additional definition + * of the stack pointer since it's needed in a core dump. + * pr_regs is a elf_gregset_t and should be filled according + * to the layout of user_regs_struct. + */ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->r0; \ + pr_reg[1] = regs->r1; \ + pr_reg[2] = regs->r2; \ + pr_reg[3] = regs->r3; \ + pr_reg[4] = regs->r4; \ + pr_reg[5] = regs->r5; \ + pr_reg[6] = regs->r6; \ + pr_reg[7] = regs->r7; \ + pr_reg[8] = regs->r8; \ + pr_reg[9] = regs->r9; \ + pr_reg[10] = regs->r10; \ + pr_reg[11] = regs->r11; \ + pr_reg[12] = regs->r12; \ + pr_reg[13] = regs->r13; \ + pr_reg[14] = rdusp(); /* SP */ \ + pr_reg[15] = regs->acr; /* ACR */ \ + pr_reg[16] = 0; /* BZ */ \ + pr_reg[17] = rdvr(); /* VR */ \ + pr_reg[18] = 0; /* PID */ \ + pr_reg[19] = regs->srs; /* SRS */ \ + pr_reg[20] = 0; /* WZ */ \ + pr_reg[21] = regs->exs; /* EXS */ \ + pr_reg[22] = regs->eda; /* EDA */ \ + pr_reg[23] = regs->mof; /* MOF */ \ + pr_reg[24] = 0; /* DZ */ \ + pr_reg[25] = 0; /* EBP */ \ + pr_reg[26] = regs->erp; /* ERP */ \ + pr_reg[27] = regs->srp; /* SRP */ \ + pr_reg[28] = 0; /* NRP */ \ + pr_reg[29] = regs->ccs; /* CCS */ \ + pr_reg[30] = rdusp(); /* USP */ \ + pr_reg[31] = regs->spc; /* SPC */ \ + +#endif /* _ASM_CRIS_ELF_H */ -- cgit v0.10.2 From 0c02fa2f4ef47334387aeb3aef21ccdab6513459 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:31:10 +0100 Subject: CRIS: UAPI: use generic headers via Kbuild Use Kbuild magic to include the generic headers. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index ad2244f..1e19a4f 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,14 +1,19 @@ generic-y += atomic.h generic-y += barrier.h +generic-y += bitsperlong.h generic-y += clkdev.h generic-y += cmpxchg.h generic-y += cputime.h generic-y += device.h generic-y += div64.h +generic-y += errno.h generic-y += exec.h generic-y += emergency-restart.h +generic-y += fcntl.h generic-y += futex.h generic-y += hardirq.h +generic-y += ioctl.h +generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h @@ -19,10 +24,15 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h +generic-y += mman.h generic-y += module.h generic-y += percpu.h +generic-y += poll.h generic-y += preempt.h +generic-y += resource.h generic-y += sections.h +generic-y += siginfo.h +generic-y += statfs.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h diff --git a/arch/cris/include/uapi/asm/bitsperlong.h b/arch/cris/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0..0000000 --- a/arch/cris/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/errno.h b/arch/cris/include/uapi/asm/errno.h deleted file mode 100644 index 2bf5eb5..0000000 --- a/arch/cris/include/uapi/asm/errno.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _CRIS_ERRNO_H -#define _CRIS_ERRNO_H - -#include - -#endif diff --git a/arch/cris/include/uapi/asm/fcntl.h b/arch/cris/include/uapi/asm/fcntl.h deleted file mode 100644 index 46ab12d..0000000 --- a/arch/cris/include/uapi/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/ioctl.h b/arch/cris/include/uapi/asm/ioctl.h deleted file mode 100644 index b279fe0..0000000 --- a/arch/cris/include/uapi/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/ipcbuf.h b/arch/cris/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 84c7e51..0000000 --- a/arch/cris/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/kvm_para.h b/arch/cris/include/uapi/asm/kvm_para.h deleted file mode 100644 index 14fab8f..0000000 --- a/arch/cris/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/mman.h b/arch/cris/include/uapi/asm/mman.h deleted file mode 100644 index 8eebf89..0000000 --- a/arch/cris/include/uapi/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/poll.h b/arch/cris/include/uapi/asm/poll.h deleted file mode 100644 index c98509d..0000000 --- a/arch/cris/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/cris/include/uapi/asm/resource.h b/arch/cris/include/uapi/asm/resource.h deleted file mode 100644 index b5d2944..0000000 --- a/arch/cris/include/uapi/asm/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _CRIS_RESOURCE_H -#define _CRIS_RESOURCE_H - -#include - -#endif diff --git a/arch/cris/include/uapi/asm/siginfo.h b/arch/cris/include/uapi/asm/siginfo.h deleted file mode 100644 index c1cd6d1..0000000 --- a/arch/cris/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _CRIS_SIGINFO_H -#define _CRIS_SIGINFO_H - -#include - -#endif diff --git a/arch/cris/include/uapi/asm/statfs.h b/arch/cris/include/uapi/asm/statfs.h deleted file mode 100644 index fdaf921..0000000 --- a/arch/cris/include/uapi/asm/statfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _CRIS_STATFS_H -#define _CRIS_STATFS_H - -#include - -#endif -- cgit v0.10.2 From 889d74a4d40f60427d0482bc271bfc5660a1157b Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 22:59:42 +0100 Subject: CRIS: UAPI: use generic auxvec.h CRIS's auxvec.h is empty just like the asm-generic version. Effective diff: -#ifndef __ASMCRIS_AUXVEC_H -#define __ASMCRIS_AUXVEC_H +#ifndef __ASM_GENERIC_AUXVEC_H +#define __ASM_GENERIC_AUXVEC_H + Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 1e19a4f..1ee91be 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += atomic.h +generic-y += auxvec.h generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h diff --git a/arch/cris/include/uapi/asm/auxvec.h b/arch/cris/include/uapi/asm/auxvec.h deleted file mode 100644 index cb30b01..0000000 --- a/arch/cris/include/uapi/asm/auxvec.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __ASMCRIS_AUXVEC_H -#define __ASMCRIS_AUXVEC_H - -#endif -- cgit v0.10.2 From c823b970b632a86e4f01f3eae9c0860350b70c3c Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:21:25 +0100 Subject: CRIS: UAPI: use generic sockios.h CRIS' sockios.h is equivalent to the asm-generic version. Effective diff: -#ifndef __ARCH_CRIS_SOCKIOS__ -#define __ARCH_CRIS_SOCKIOS__ +#ifndef __ASM_GENERIC_SOCKIOS_H +#define __ASM_GENERIC_SOCKIOS_H Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 1ee91be..14095b9 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -33,6 +33,7 @@ generic-y += preempt.h generic-y += resource.h generic-y += sections.h generic-y += siginfo.h +generic-y += sockios.h generic-y += statfs.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/cris/include/uapi/asm/sockios.h b/arch/cris/include/uapi/asm/sockios.h deleted file mode 100644 index cfe7bfe..0000000 --- a/arch/cris/include/uapi/asm/sockios.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ARCH_CRIS_SOCKIOS__ -#define __ARCH_CRIS_SOCKIOS__ - -/* Socket-level I/O control calls. */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ - -#endif -- cgit v0.10.2 From 53789d25a0603e5c836e460c758867d2071eb135 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:43:31 +0100 Subject: CRIS: UAPI: use generic sembuf.h CRIS's sembuf.h is equivalent to the asm-generic version. Effective diff: -#ifndef _CRIS_SEMBUF_H -#define _CRIS_SEMBUF_H +#ifndef __ASM_GENERIC_SEMBUF_H +#define __ASM_GENERIC_SEMBUF_H +#include struct semid64_ds { struct ipc64_perm sem_perm; __kernel_time_t sem_otime; +#if __BITS_PER_LONG != 64 unsigned long __unused1; +#endif __kernel_time_t sem_ctime; +#if __BITS_PER_LONG != 64 unsigned long __unused2; +#endif unsigned long sem_nsems; unsigned long __unused3; unsigned long __unused4; Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 14095b9..2dcd670 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -32,6 +32,7 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += sections.h +generic-y += sembuf.h generic-y += siginfo.h generic-y += sockios.h generic-y += statfs.h diff --git a/arch/cris/include/uapi/asm/sembuf.h b/arch/cris/include/uapi/asm/sembuf.h deleted file mode 100644 index 7fed984..0000000 --- a/arch/cris/include/uapi/asm/sembuf.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _CRIS_SEMBUF_H -#define _CRIS_SEMBUF_H - -/* - * The semid64_ds structure for CRIS architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct semid64_ds { - struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - __kernel_time_t sem_otime; /* last semop time */ - unsigned long __unused1; - __kernel_time_t sem_ctime; /* last change time */ - unsigned long __unused2; - unsigned long sem_nsems; /* no. of semaphores in array */ - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _CRIS_SEMBUF_H */ -- cgit v0.10.2 From 45266922510fcd1e55df483f41be8debd5df9de8 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:47:37 +0100 Subject: CRIS: UAPI: use generic socket.h CRIS' socket.h is equivalent to the asm-generic version. Effective diff: -#ifndef _ASM_SOCKET_H -#define _ASM_SOCKET_H - - +#ifndef __ASM_GENERIC_SOCKET_H +#define __ASM_GENERIC_SOCKET_H #include #define SO_LINGER 13 #define SO_BSDCOMPAT 14 #define SO_REUSEPORT 15 +#ifndef SO_PASSCRED #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 #define SO_SNDLOWAT 19 #define SO_RCVTIMEO 20 #define SO_SNDTIMEO 21 +#endif #define SO_SECURITY_AUTHENTICATION 22 Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 2dcd670..30ed4e2 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -34,6 +34,7 @@ generic-y += resource.h generic-y += sections.h generic-y += sembuf.h generic-y += siginfo.h +generic-y += socket.h generic-y += sockios.h generic-y += statfs.h generic-y += topology.h diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h deleted file mode 100644 index e2503d9f..0000000 --- a/arch/cris/include/uapi/asm/socket.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef _ASM_SOCKET_H -#define _ASM_SOCKET_H - -/* almost the same as asm-i386/socket.h */ - -#include - -/* For setsockoptions(2) */ -#define SOL_SOCKET 1 - -#define SO_DEBUG 1 -#define SO_REUSEADDR 2 -#define SO_TYPE 3 -#define SO_ERROR 4 -#define SO_DONTROUTE 5 -#define SO_BROADCAST 6 -#define SO_SNDBUF 7 -#define SO_RCVBUF 8 -#define SO_SNDBUFFORCE 32 -#define SO_RCVBUFFORCE 33 -#define SO_KEEPALIVE 9 -#define SO_OOBINLINE 10 -#define SO_NO_CHECK 11 -#define SO_PRIORITY 12 -#define SO_LINGER 13 -#define SO_BSDCOMPAT 14 -#define SO_REUSEPORT 15 -#define SO_PASSCRED 16 -#define SO_PEERCRED 17 -#define SO_RCVLOWAT 18 -#define SO_SNDLOWAT 19 -#define SO_RCVTIMEO 20 -#define SO_SNDTIMEO 21 - -/* Security levels - as per NRL IPv6 - don't actually do anything */ -#define SO_SECURITY_AUTHENTICATION 22 -#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 -#define SO_SECURITY_ENCRYPTION_NETWORK 24 - -#define SO_BINDTODEVICE 25 - -/* Socket filtering */ -#define SO_ATTACH_FILTER 26 -#define SO_DETACH_FILTER 27 -#define SO_GET_FILTER SO_ATTACH_FILTER - -#define SO_PEERNAME 28 -#define SO_TIMESTAMP 29 -#define SCM_TIMESTAMP SO_TIMESTAMP - -#define SO_ACCEPTCONN 30 - -#define SO_PEERSEC 31 -#define SO_PASSSEC 34 -#define SO_TIMESTAMPNS 35 -#define SCM_TIMESTAMPNS SO_TIMESTAMPNS - -#define SO_MARK 36 - -#define SO_TIMESTAMPING 37 -#define SCM_TIMESTAMPING SO_TIMESTAMPING - -#define SO_PROTOCOL 38 -#define SO_DOMAIN 39 - -#define SO_RXQ_OVFL 40 - -#define SO_WIFI_STATUS 41 -#define SCM_WIFI_STATUS SO_WIFI_STATUS -#define SO_PEEK_OFF 42 - -/* Instruct lower device to use last 4-bytes of skb data as FCS */ -#define SO_NOFCS 43 - -#define SO_LOCK_FILTER 44 - -#define SO_SELECT_ERR_QUEUE 45 - -#define SO_BUSY_POLL 46 - -#define SO_MAX_PACING_RATE 47 - -#define SO_BPF_EXTENSIONS 48 - -#define SO_INCOMING_CPU 49 - -#define SO_ATTACH_BPF 50 -#define SO_DETACH_BPF SO_DETACH_FILTER - -#endif /* _ASM_SOCKET_H */ - - -- cgit v0.10.2 From 74d94adb351161cc4027f94e878ac4e80adcfdc9 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:53:37 +0100 Subject: CRIS: UAPI: use generic msgbuf.h CRIS' msgbuf.h is equivalent to the asm-generic version. Effective diff: -#ifndef _CRIS_MSGBUF_H -#define _CRIS_MSGBUF_H - - +#ifndef __ASM_GENERIC_MSGBUF_H +#define __ASM_GENERIC_MSGBUF_H +#include struct msqid64_ds { struct ipc64_perm msg_perm; __kernel_time_t msg_stime; +#if __BITS_PER_LONG != 64 unsigned long __unused1; +#endif __kernel_time_t msg_rtime; +#if __BITS_PER_LONG != 64 unsigned long __unused2; +#endif __kernel_time_t msg_ctime; +#if __BITS_PER_LONG != 64 unsigned long __unused3; - unsigned long msg_cbytes; - unsigned long msg_qnum; - unsigned long msg_qbytes; +#endif + __kernel_ulong_t msg_cbytes; + __kernel_ulong_t msg_qnum; + __kernel_ulong_t msg_qbytes; __kernel_pid_t msg_lspid; __kernel_pid_t msg_lrpid; - unsigned long __unused4; - unsigned long __unused5; + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; }; #endif Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 30ed4e2..ccc0182 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -27,6 +27,7 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += module.h +generic-y += msgbuf.h generic-y += percpu.h generic-y += poll.h generic-y += preempt.h diff --git a/arch/cris/include/uapi/asm/msgbuf.h b/arch/cris/include/uapi/asm/msgbuf.h deleted file mode 100644 index ada63df..0000000 --- a/arch/cris/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _CRIS_MSGBUF_H -#define _CRIS_MSGBUF_H - -/* verbatim copy of asm-i386 version */ - -/* - * The msqid64_ds structure for CRIS architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ - unsigned long __unused1; - __kernel_time_t msg_rtime; /* last msgrcv time */ - unsigned long __unused2; - __kernel_time_t msg_ctime; /* last change time */ - unsigned long __unused3; - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused4; - unsigned long __unused5; -}; - -#endif /* _CRIS_MSGBUF_H */ -- cgit v0.10.2 From 258a9ff66c30c830f6e2f06856878be4d4cc81af Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:55:46 +0100 Subject: CRIS: UAPI: use generic shmbuf.h CRIS' shmbuf.h is equivalent to the asm-generic verison. Effective diff: -#ifndef _CRIS_SHMBUF_H -#define _CRIS_SHMBUF_H +#ifndef __ASM_GENERIC_SHMBUF_H +#define __ASM_GENERIC_SHMBUF_H + +#include struct ipc64_perm shm_perm; size_t shm_segsz; __kernel_time_t shm_atime; +#if __BITS_PER_LONG != 64 unsigned long __unused1; +#endif __kernel_time_t shm_dtime; +#if __BITS_PER_LONG != 64 unsigned long __unused2; +#endif __kernel_time_t shm_ctime; +#if __BITS_PER_LONG != 64 unsigned long __unused3; +#endif __kernel_pid_t shm_cpid; __kernel_pid_t shm_lpid; - unsigned long shm_nattch; - unsigned long __unused4; - unsigned long __unused5; + __kernel_ulong_t shm_nattch; + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; }; struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; + __kernel_ulong_t shmmax; + __kernel_ulong_t shmmin; + __kernel_ulong_t shmmni; + __kernel_ulong_t shmseg; + __kernel_ulong_t shmall; + __kernel_ulong_t __unused1; + __kernel_ulong_t __unused2; + __kernel_ulong_t __unused3; + __kernel_ulong_t __unused4; }; #endif Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index ccc0182..17b5237 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -34,6 +34,7 @@ generic-y += preempt.h generic-y += resource.h generic-y += sections.h generic-y += sembuf.h +generic-y += shmbuf.h generic-y += siginfo.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/cris/include/uapi/asm/shmbuf.h b/arch/cris/include/uapi/asm/shmbuf.h deleted file mode 100644 index 3239e3f..0000000 --- a/arch/cris/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _CRIS_SHMBUF_H -#define _CRIS_SHMBUF_H - -/* - * The shmid64_ds structure for CRIS architecture (same as for i386) - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - unsigned long __unused1; - __kernel_time_t shm_dtime; /* last detach time */ - unsigned long __unused2; - __kernel_time_t shm_ctime; /* last change time */ - unsigned long __unused3; - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused4; - unsigned long __unused5; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _CRIS_SHMBUF_H */ -- cgit v0.10.2 From 20ae24739968ae7f94d0e73215d444042f91df78 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 28 Feb 2015 23:59:50 +0100 Subject: CRIS: UAPI: use generic types.h CRIS' types.h is functionally identical to the asm-generic version. Effective diff: +#ifndef _ASM_GENERIC_TYPES_H +#define _ASM_GENERIC_TYPES_H + #include + +#endif Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 17b5237..b7f6819 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -41,5 +41,6 @@ generic-y += sockios.h generic-y += statfs.h generic-y += topology.h generic-y += trace_clock.h +generic-y += types.h generic-y += vga.h generic-y += xor.h diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h deleted file mode 100644 index a3cac77..0000000 --- a/arch/cris/include/asm/types.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ETRAX_TYPES_H -#define _ETRAX_TYPES_H - -#include - -/* - * These aren't exported outside the kernel to avoid name space clashes - */ - -#define BITS_PER_LONG 32 - -#endif diff --git a/arch/cris/include/uapi/asm/types.h b/arch/cris/include/uapi/asm/types.h deleted file mode 100644 index 9ec9d4c..0000000 --- a/arch/cris/include/uapi/asm/types.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v0.10.2 From 444e0c2881bcc70db6833d9c653c6ced36813d3b Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 19 Feb 2015 21:36:55 +0100 Subject: CRISv32: add support for irqflags tracing Add support irqflags tracing, which is required for things like lockdep and ftrace. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index b202b82..e7ba2d4 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -36,6 +36,10 @@ config FORCE_MAX_ZONEORDER int default 6 +config TRACE_IRQFLAGS_SUPPORT + depends on ETRAX_ARCH_V32 + def_bool y + config CRIS bool default y diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 1c5595a..b17a209 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -240,6 +240,17 @@ ret_from_sys_call: .type _Rexit,@function _Rexit: +#if defined(CONFIG_TRACE_IRQFLAGS) + addoq +PT_ccs, $sp, $acr + move.d [$acr], $r0 + btstq 15, $r0 ; I1 + bpl 1f + nop + jsr trace_hardirqs_on + nop +1: +#endif + ;; This epilogue MUST match the prologues in multiple_interrupt, irq.h ;; and ptregs.h. addq 4, $sp ; Skip orig_r10. diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index dd0be5d..694850e 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -45,7 +45,11 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { unsigned long sp; - struct pt_regs *old_regs = set_irq_regs(regs); + struct pt_regs *old_regs; + + trace_hardirqs_off(); + + old_regs = set_irq_regs(regs); irq_enter(); sp = rdsp(); if (unlikely((sp & (PAGE_SIZE - 1)) < (PAGE_SIZE/8))) { -- cgit v0.10.2 From 3fffa23ee0a348aef1b597b67626d4724667143b Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 20 Feb 2015 18:57:37 +0100 Subject: CRISv32: annotate irq enable in idle loop Use a call to local_irq_enable() instead of incline asm so that the irqsoff latency tracer knows that interrupts are enabled here. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index cebd32e..c7ce784 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -23,9 +23,9 @@ extern void stop_watchdog(void); /* We use this if we don't have any better idle routine. */ void default_idle(void) { + local_irq_enable(); /* Halt until exception. */ - __asm__ volatile("ei \n\t" - "halt "); + __asm__ volatile("halt"); } /* -- cgit v0.10.2 From aa6f4d2b6547a9949d87c9b09a872a7015366588 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 14 May 2015 18:19:03 +0200 Subject: CRIS: add STACKTRACE_SUPPORT Add stacktrace support, which is required for lockdep and tracing. The stack tracing simply looks at all kernel text symbols found on the stack, similar to the trap stack dumping code, which can also be converted to use this. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index e7ba2d4..61f4acc 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -40,6 +40,9 @@ config TRACE_IRQFLAGS_SUPPORT depends on ETRAX_ARCH_V32 def_bool y +config STACKTRACE_SUPPORT + def_bool y + config CRIS bool default y diff --git a/arch/cris/include/asm/stacktrace.h b/arch/cris/include/asm/stacktrace.h new file mode 100644 index 0000000..2d90856 --- /dev/null +++ b/arch/cris/include/asm/stacktrace.h @@ -0,0 +1,8 @@ +#ifndef __CRIS_STACKTRACE_H +#define __CRIS_STACKTRACE_H + +void walk_stackframe(unsigned long sp, + int (*fn)(unsigned long addr, void *data), + void *data); + +#endif diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile index edef71f..5fae398 100644 --- a/arch/cris/kernel/Makefile +++ b/arch/cris/kernel/Makefile @@ -8,6 +8,7 @@ extra-y := vmlinux.lds obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o obj-y += devicetree.o +obj-y += stacktrace.o obj-$(CONFIG_MODULES) += crisksyms.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/cris/kernel/stacktrace.c b/arch/cris/kernel/stacktrace.c new file mode 100644 index 0000000..99838c7 --- /dev/null +++ b/arch/cris/kernel/stacktrace.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +void walk_stackframe(unsigned long sp, + int (*fn)(unsigned long addr, void *data), + void *data) +{ + unsigned long high = ALIGN(sp, THREAD_SIZE); + + for (; sp <= high - 4; sp += 4) { + unsigned long addr = *(unsigned long *) sp; + + if (!kernel_text_address(addr)) + continue; + + if (fn(addr, data)) + break; + } +} + +struct stack_trace_data { + struct stack_trace *trace; + unsigned int no_sched_functions; + unsigned int skip; +}; + +#ifdef CONFIG_STACKTRACE + +static int save_trace(unsigned long addr, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + + if (data->no_sched_functions && in_sched_functions(addr)) + return 0; + + if (data->skip) { + data->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = addr; + + return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + struct stack_trace_data data; + unsigned long sp; + + data.trace = trace; + data.skip = trace->skip; + + if (tsk != current) { + data.no_sched_functions = 1; + sp = tsk->thread.ksp; + } else { + data.no_sched_functions = 0; + sp = rdsp(); + } + + walk_stackframe(sp, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +#endif /* CONFIG_STACKTRACE */ -- cgit v0.10.2 From 94c5c115c1f7d347d5ec7f32a090f8643dd42525 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 14 May 2015 18:19:37 +0200 Subject: CRISv32: enable LOCKDEP_SUPPORT Now that we have stack tracing and irq flags tracing support, we can also enable lockdep support Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 61f4acc..8da5653 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -43,6 +43,10 @@ config TRACE_IRQFLAGS_SUPPORT config STACKTRACE_SUPPORT def_bool y +config LOCKDEP_SUPPORT + depends on ETRAX_ARCH_V32 + def_bool y + config CRIS bool default y -- cgit v0.10.2 From 7f0144e7779a8c62e3177301d4b2179432ce5460 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 20 Feb 2015 19:35:16 +0100 Subject: CRIS: fix switch_mm() lockdep splat With lockdep support implemented on CRISv32, we get the following splat. switch_mm() can be called both from the scheduler() (with interrupts disabled) and from flush_old_exec (via activate_mm()), with interrupts enabled. Fix it by disabling interrupts in activate_mm(), similar to powerpc and hexagon. t====================================================== [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] 3.19.0-08802-g20bc9f1-dirty #323 Not tainted ------------------------------------------------------ init/1 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: (mmu_context_lock){+.+...}, at: [] switch_mm+0x22/0xc6 and this task is already holding: (&rq->lock){-.-.-.}, at: [] __schedule+0x5e/0x648 which would create a new lock dependency: (&rq->lock){-.-.-.} -> (mmu_context_lock){+.+...} but this new dependency connects a HARDIRQ-irq-safe lock: (&rq->lock){-.-.-.} ... which became HARDIRQ-irq-safe at: [] scheduler_tick+0x28/0x5e [] timer_interrupt+0x4e/0x6a [] handle_irq_event_percpu+0x54/0x13c [] generic_handle_irq+0x2a/0x36 to a HARDIRQ-irq-unsafe lock: (mmu_context_lock){+.+...} ... which became HARDIRQ-irq-unsafe at: ... [] __lock_acquire+0x8f8/0x1d9c [] switch_mm+0x22/0xc6 [] flush_old_exec+0x500/0x5d4 [] load_elf_phdrs+0x7a/0x84 [] load_elf_binary+0x21c/0x13b4 [] do_execve+0x22/0x2c [] ____call_usermodehelper+0x0/0x154 [] ret_from_kernel_thread+0xe/0x14 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(mmu_context_lock); local_irq_disable(); lock(&rq->lock); lock(mmu_context_lock); lock(&rq->lock); *** DEADLOCK *** 1 lock held by init/1: #0: (&rq->lock){-.-.-.}, at: [] __schedule+0x5e/0x648 Call Trace: [] printk+0x0/0x4e [] print_shortest_lock_dependencies+0x0/0x15c [] print_stack_trace+0x0/0x88 [] __lock_is_held+0x3e/0x5e [] lock_acquire+0x8a/0xcc [] _raw_spin_lock+0x44/0x7a [] switch_mm+0x22/0xc6 [] __schedule+0x0/0x648 [] schedule+0x36/0x7c [] trace_hardirqs_on+0x0/0x1e [] do_work_pending+0x30/0xd4 [] _work_pending+0xe/0x12 Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/asm/mmu_context.h b/arch/cris/include/asm/mmu_context.h index 1d45fd6..349acfd 100644 --- a/arch/cris/include/asm/mmu_context.h +++ b/arch/cris/include/asm/mmu_context.h @@ -11,7 +11,14 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm((prev),(next),NULL) +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm(prev, next, NULL); + local_irq_restore(flags); +} /* current active pgd - this is similar to other processors pgd * registers like cr3 on the i386 -- cgit v0.10.2 From c2ffc68afc0fce16923a54b2dad4d544463b9e0b Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 22 May 2015 20:58:48 +0200 Subject: CRISv10: delete unused lib/old_checksum.c This file is never built. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c deleted file mode 100644 index 8f79163..0000000 --- a/arch/cris/arch-v10/lib/old_checksum.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IP/TCP/UDP checksumming routines - * - * Authors: Jorge Cwik, - * Arnt Gulbrandsen, - * Tom May, - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#undef PROFILE_CHECKSUM - -#ifdef PROFILE_CHECKSUM -/* these are just for profiling the checksum code with an oscillioscope.. uh */ -#if 0 -#define BITOFF *((unsigned char *)0xb0000030) = 0xff -#define BITON *((unsigned char *)0xb0000030) = 0x0 -#endif -#include -#define CBITON LED_ACTIVE_SET(1) -#define CBITOFF LED_ACTIVE_SET(0) -#define BITOFF -#define BITON -#else -#define BITOFF -#define BITON -#define CBITOFF -#define CBITON -#endif - -/* - * computes a partial checksum, e.g. for TCP/UDP fragments - */ - -#include - -__wsum csum_partial(const void *p, int len, __wsum __sum) -{ - u32 sum = (__force u32)__sum; - const u16 *buff = p; - /* - * Experiments with ethernet and slip connections show that buff - * is aligned on either a 2-byte or 4-byte boundary. - */ - const void *endMarker = p + len; - const void *marker = endMarker - (len % 16); -#if 0 - if((int)buff & 0x3) - printk("unaligned buff %p\n", buff); - __delay(900); /* extra delay of 90 us to test performance hit */ -#endif - BITON; - while (buff < marker) { - sum += *buff++; - sum += *buff++; - sum += *buff++; - sum += *buff++; - sum += *buff++; - sum += *buff++; - sum += *buff++; - sum += *buff++; - } - marker = endMarker - (len % 2); - while (buff < marker) - sum += *buff++; - - if (endMarker > buff) - sum += *(const u8 *)buff; /* add extra byte separately */ - - BITOFF; - return (__force __wsum)sum; -} - -EXPORT_SYMBOL(csum_partial); -- cgit v0.10.2 From 254a0f4135482dc02701562a38e10675a48c8ddf Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 22 May 2015 20:58:49 +0200 Subject: CRISv10: delete unused lib/dmacopy.c This file is never built. Signed-off-by: Rabin Vincent Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/lib/dmacopy.c b/arch/cris/arch-v10/lib/dmacopy.c deleted file mode 100644 index 49f5b8c..0000000 --- a/arch/cris/arch-v10/lib/dmacopy.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax - */ - -#include -#include - -#define D(x) - -void *dma_memcpy(void *pdst, - const void *psrc, - unsigned int pn) -{ - static etrax_dma_descr indma, outdma; - - D(printk(KERN_DEBUG "dma_memcpy %d bytes... ", pn)); - -#if 0 - *R_GEN_CONFIG = genconfig_shadow = - (genconfig_shadow & ~0x3c0000) | - IO_STATE(R_GEN_CONFIG, dma6, intdma7) | - IO_STATE(R_GEN_CONFIG, dma7, intdma6); -#endif - indma.sw_len = outdma.sw_len = pn; - indma.ctrl = d_eol | d_eop; - outdma.ctrl = d_eol; - indma.buf = psrc; - outdma.buf = pdst; - - *R_DMA_CH6_FIRST = &indma; - *R_DMA_CH7_FIRST = &outdma; - *R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start); - *R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start); - - while (*R_DMA_CH7_CMD == 1) - /* wait for completion */; - - D(printk(KERN_DEBUG "done\n")); -} - - - -- cgit v0.10.2 From 5269e7067cd66f4797760f160be21ff50f2d1582 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 3 Sep 2015 08:20:58 -0700 Subject: cpufreq: Add ARM_MT8173_CPUFREQ dependency on THERMAL If ARM_MT8173_CPUFREQ is configured, and THERMAL is configured as module, the following build error is seen for arm:allmodconfig and arm64:allmodconfig. drivers/built-in.o: In function `mtk_cpufreq_ready': :(.text+0x32a20c): undefined reference to `of_cpufreq_cooling_register' drivers/built-in.o: In function `mtk_cpufreq_exit': :(.text+0x32a420): undefined reference to `cpufreq_cooling_unregister' The fix is similar to CPUFREQ_DT, but more restrictive since ARM_MT8173_CPUFREQ can not be built as module. Fixes: 1453863fb02a ("cpufreq: mediatek: Add MT8173 cpufreq driver") Signed-off-by: Guenter Roeck Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 2bacf24..360c5df 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -133,6 +133,7 @@ config ARM_KIRKWOOD_CPUFREQ config ARM_MT8173_CPUFREQ bool "Mediatek MT8173 CPUFreq support" depends on ARCH_MEDIATEK && REGULATOR + depends on !CPU_THERMAL || THERMAL=y select PM_OPP help This adds the CPUFreq driver support for Mediatek MT8173 SoC. -- cgit v0.10.2 From 30e7a65b3fdb53cc49f85c965095e40aceea3961 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 3 Sep 2015 09:10:37 +0100 Subject: PM / Domains: Ensure subdomain is not in use before removing The function pm_genpd_remove_subdomain() removes a subdomain from a generic PM domain, however, it does not check if the subdomain has any slave domains or device attached before doing so. Therefore, add a test to verify that the subdomain does not have any slave domains associated or any device attached before removing. Signed-off-by: Jon Hunter Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 62f7572..16550c6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1473,6 +1473,13 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, mutex_lock(&genpd->lock); + if (!list_empty(&subdomain->slave_links) || subdomain->device_count) { + pr_warn("%s: unable to remove subdomain %s\n", genpd->name, + subdomain->name); + ret = -EBUSY; + goto out; + } + list_for_each_entry(link, &genpd->master_links, master_node) { if (link->slave != subdomain) continue; @@ -1491,6 +1498,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, break; } +out: mutex_unlock(&genpd->lock); return ret; -- cgit v0.10.2 From 76fc5e7b2355af167dea1a32e93c57fc37900a5b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 4 Sep 2015 17:00:43 -0700 Subject: x86/vm86: Block non-root vm86(old) if mmap_min_addr != 0 vm86 exposes an interesting attack surface against the entry code. Since vm86 is mostly useless anyway if mmap_min_addr != 0, just turn it off in that case. There are some reports that vbetool can work despite setting mmap_min_addr to zero. This shouldn't break that use case, as CAP_SYS_RAWIO already overrides mmap_min_addr. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856..5246193 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) struct pt_regs *regs = current_pt_regs(); unsigned long err = 0; + err = security_mmap_addr(0); + if (err) { + /* + * vm86 cannot virtualize the address space, so vm86 users + * need to manage the low 1MB themselves using mmap. Given + * that BIOS places important data in the first page, vm86 + * is essentially useless if mmap_min_addr != 0. DOSEMU, + * for example, won't even bother trying to use vm86 if it + * can't map a page at virtual address 0. + * + * To reduce the available kernel attack surface, simply + * disallow vm86(old) for users who cannot mmap at va 0. + * + * The implementation of security_mmap_addr will allow + * suitably privileged users to map va 0 even if + * vm.mmap_min_addr is set above 0, and we want this + * behavior for vm86 as well, as it ensures that legacy + * tools like vbetool will not fail just because of + * vm.mmap_min_addr. + */ + pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", + current->comm, task_pid_nr(current), + from_kuid_munged(&init_user_ns, current_uid())); + return -EPERM; + } + if (!vm86) { if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) return -ENOMEM; diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 9a43a59..421c607 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, v86->regs.eip = eip; ret = vm86(VM86_ENTER, v86); - if (ret == -1 && errno == ENOSYS) { - printf("[SKIP]\tvm86 not supported\n"); + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); return false; } -- cgit v0.10.2 From f5d75341fac6033f6afac900da110cc78e06d40d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 5 Sep 2015 10:29:09 -0700 Subject: Input: i8042 - lower log level for "no controller" message Nowadays the machines without i8042 controller is popular, and no need to print "No controller found" message in the error log level, which annoys at booting in quiet mode. Let's lower it info level. Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index c9c98f0a..db91de5 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -877,7 +877,7 @@ static int __init i8042_check_aux(void) static int i8042_controller_check(void) { if (i8042_flush()) { - pr_err("No controller found\n"); + pr_info("No controller found\n"); return -ENODEV; } -- cgit v0.10.2 From 48ead50c1dd8e5cdb7ead067558a834c1e895e6e Mon Sep 17 00:00:00 2001 From: Sanchayan Maity Date: Sat, 5 Sep 2015 10:32:09 -0700 Subject: Input: Add touchscreen support for Colibri VF50 The Colibri Vybrid VF50 module supports 4-wire touchscreens using FETs and ADC inputs. This driver uses the IIO consumer interface and relies on the vf610_adc driver based on the IIO framework. Signed-off-by: Sanchayan Maity Signed-off-by: Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt new file mode 100644 index 0000000..9d9e930 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt @@ -0,0 +1,36 @@ +* Toradex Colibri VF50 Touchscreen driver + +Required Properties: +- compatible must be toradex,vf50-touchscreen +- io-channels: adc channels being used by the Colibri VF50 module +- xp-gpios: FET gate driver for input of X+ +- xm-gpios: FET gate driver for input of X- +- yp-gpios: FET gate driver for input of Y+ +- ym-gpios: FET gate driver for input of Y- +- interrupt-parent: phandle for the interrupt controller +- interrupts: pen irq interrupt for touch detection +- pinctrl-names: "idle", "default", "gpios" +- pinctrl-0: pinctrl node for pen/touch detection state pinmux +- pinctrl-1: pinctrl node for X/Y and pressure measurement (ADC) state pinmux +- pinctrl-2: pinctrl node for gpios functioning as FET gate drivers +- vf50-ts-min-pressure: pressure level at which to stop measuring X/Y values + +Example: + + touchctrl: vf50_touchctrl { + compatible = "toradex,vf50-touchscreen"; + io-channels = <&adc1 0>,<&adc0 0>, + <&adc0 1>,<&adc1 2>; + xp-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>; + xm-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>; + yp-gpios = <&gpio0 12 GPIO_ACTIVE_LOW>; + ym-gpios = <&gpio0 4 GPIO_ACTIVE_HIGH>; + interrupt-parent = <&gpio0>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "idle","default","gpios"; + pinctrl-0 = <&pinctrl_touchctrl_idle>; + pinctrl-1 = <&pinctrl_touchctrl_default>; + pinctrl-2 = <&pinctrl_touchctrl_gpios>; + vf50-ts-min-pressure = <200>; + status = "disabled"; + }; diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 059edeb..a6d7a4d 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -1040,4 +1040,16 @@ config TOUCHSCREEN_ZFORCE To compile this driver as a module, choose M here: the module will be called zforce_ts. +config TOUCHSCREEN_COLIBRI_VF50 + tristate "Toradex Colibri on board touchscreen driver" + depends on GPIOLIB && IIO && VF610_ADC + help + Say Y here if you have a Colibri VF50 and plan to use + the on-board provided 4-wire touchscreen driver. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called colibri_vf50_ts. + endif diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index c85aae2..fb27f7e 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -85,3 +85,4 @@ obj-$(CONFIG_TOUCHSCREEN_W90X900) += w90p910_ts.o obj-$(CONFIG_TOUCHSCREEN_SX8654) += sx8654.o obj-$(CONFIG_TOUCHSCREEN_TPS6507X) += tps6507x-ts.o obj-$(CONFIG_TOUCHSCREEN_ZFORCE) += zforce_ts.o +obj-$(CONFIG_TOUCHSCREEN_COLIBRI_VF50) += colibri-vf50-ts.o diff --git a/drivers/input/touchscreen/colibri-vf50-ts.c b/drivers/input/touchscreen/colibri-vf50-ts.c new file mode 100644 index 0000000..5d4903a --- /dev/null +++ b/drivers/input/touchscreen/colibri-vf50-ts.c @@ -0,0 +1,386 @@ +/* + * Toradex Colibri VF50 Touchscreen driver + * + * Copyright 2015 Toradex AG + * + * Originally authored by Stefan Agner for 3.0 kernel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "colibri-vf50-ts" +#define DRV_VERSION "1.0" + +#define VF_ADC_MAX ((1 << 12) - 1) + +#define COLI_TOUCH_MIN_DELAY_US 1000 +#define COLI_TOUCH_MAX_DELAY_US 2000 +#define COLI_PULLUP_MIN_DELAY_US 10000 +#define COLI_PULLUP_MAX_DELAY_US 11000 +#define COLI_TOUCH_NO_OF_AVGS 5 +#define COLI_TOUCH_REQ_ADC_CHAN 4 + +struct vf50_touch_device { + struct platform_device *pdev; + struct input_dev *ts_input; + struct iio_channel *channels; + struct gpio_desc *gpio_xp; + struct gpio_desc *gpio_xm; + struct gpio_desc *gpio_yp; + struct gpio_desc *gpio_ym; + int pen_irq; + int min_pressure; + bool stop_touchscreen; +}; + +/* + * Enables given plates and measures touch parameters using ADC + */ +static int adc_ts_measure(struct iio_channel *channel, + struct gpio_desc *plate_p, struct gpio_desc *plate_m) +{ + int i, value = 0, val = 0; + int error; + + gpiod_set_value(plate_p, 1); + gpiod_set_value(plate_m, 1); + + usleep_range(COLI_TOUCH_MIN_DELAY_US, COLI_TOUCH_MAX_DELAY_US); + + for (i = 0; i < COLI_TOUCH_NO_OF_AVGS; i++) { + error = iio_read_channel_raw(channel, &val); + if (error < 0) { + value = error; + goto error_iio_read; + } + + value += val; + } + + value /= COLI_TOUCH_NO_OF_AVGS; + +error_iio_read: + gpiod_set_value(plate_p, 0); + gpiod_set_value(plate_m, 0); + + return value; +} + +/* + * Enable touch detection using falling edge detection on XM + */ +static void vf50_ts_enable_touch_detection(struct vf50_touch_device *vf50_ts) +{ + /* Enable plate YM (needs to be strong GND, high active) */ + gpiod_set_value(vf50_ts->gpio_ym, 1); + + /* + * Let the platform mux to idle state in order to enable + * Pull-Up on GPIO + */ + pinctrl_pm_select_idle_state(&vf50_ts->pdev->dev); + + /* Wait for the pull-up to be stable on high */ + usleep_range(COLI_PULLUP_MIN_DELAY_US, COLI_PULLUP_MAX_DELAY_US); +} + +/* + * ADC touch screen sampling bottom half irq handler + */ +static irqreturn_t vf50_ts_irq_bh(int irq, void *private) +{ + struct vf50_touch_device *vf50_ts = private; + struct device *dev = &vf50_ts->pdev->dev; + int val_x, val_y, val_z1, val_z2, val_p = 0; + bool discard_val_on_start = true; + + /* Disable the touch detection plates */ + gpiod_set_value(vf50_ts->gpio_ym, 0); + + /* Let the platform mux to default state in order to mux as ADC */ + pinctrl_pm_select_default_state(dev); + + while (!vf50_ts->stop_touchscreen) { + /* X-Direction */ + val_x = adc_ts_measure(&vf50_ts->channels[0], + vf50_ts->gpio_xp, vf50_ts->gpio_xm); + if (val_x < 0) + break; + + /* Y-Direction */ + val_y = adc_ts_measure(&vf50_ts->channels[1], + vf50_ts->gpio_yp, vf50_ts->gpio_ym); + if (val_y < 0) + break; + + /* + * Touch pressure + * Measure on XP/YM + */ + val_z1 = adc_ts_measure(&vf50_ts->channels[2], + vf50_ts->gpio_yp, vf50_ts->gpio_xm); + if (val_z1 < 0) + break; + val_z2 = adc_ts_measure(&vf50_ts->channels[3], + vf50_ts->gpio_yp, vf50_ts->gpio_xm); + if (val_z2 < 0) + break; + + /* Validate signal (avoid calculation using noise) */ + if (val_z1 > 64 && val_x > 64) { + /* + * Calculate resistance between the plates + * lower resistance means higher pressure + */ + int r_x = (1000 * val_x) / VF_ADC_MAX; + + val_p = (r_x * val_z2) / val_z1 - r_x; + + } else { + val_p = 2000; + } + + val_p = 2000 - val_p; + dev_dbg(dev, + "Measured values: x: %d, y: %d, z1: %d, z2: %d, p: %d\n", + val_x, val_y, val_z1, val_z2, val_p); + + /* + * If touch pressure is too low, stop measuring and reenable + * touch detection + */ + if (val_p < vf50_ts->min_pressure || val_p > 2000) + break; + + /* + * The pressure may not be enough for the first x and the + * second y measurement, but, the pressure is ok when the + * driver is doing the third and fourth measurement. To + * take care of this, we drop the first measurement always. + */ + if (discard_val_on_start) { + discard_val_on_start = false; + } else { + /* + * Report touch position and sleep for + * the next measurement. + */ + input_report_abs(vf50_ts->ts_input, + ABS_X, VF_ADC_MAX - val_x); + input_report_abs(vf50_ts->ts_input, + ABS_Y, VF_ADC_MAX - val_y); + input_report_abs(vf50_ts->ts_input, + ABS_PRESSURE, val_p); + input_report_key(vf50_ts->ts_input, BTN_TOUCH, 1); + input_sync(vf50_ts->ts_input); + } + + usleep_range(COLI_PULLUP_MIN_DELAY_US, + COLI_PULLUP_MAX_DELAY_US); + } + + /* Report no more touch, re-enable touch detection */ + input_report_abs(vf50_ts->ts_input, ABS_PRESSURE, 0); + input_report_key(vf50_ts->ts_input, BTN_TOUCH, 0); + input_sync(vf50_ts->ts_input); + + vf50_ts_enable_touch_detection(vf50_ts); + + return IRQ_HANDLED; +} + +static int vf50_ts_open(struct input_dev *dev_input) +{ + struct vf50_touch_device *touchdev = input_get_drvdata(dev_input); + struct device *dev = &touchdev->pdev->dev; + + dev_dbg(dev, "Input device %s opened, starting touch detection\n", + dev_input->name); + + touchdev->stop_touchscreen = false; + + /* Mux detection before request IRQ, wait for pull-up to settle */ + vf50_ts_enable_touch_detection(touchdev); + + return 0; +} + +static void vf50_ts_close(struct input_dev *dev_input) +{ + struct vf50_touch_device *touchdev = input_get_drvdata(dev_input); + struct device *dev = &touchdev->pdev->dev; + + touchdev->stop_touchscreen = true; + + /* Make sure IRQ is not running past close */ + mb(); + synchronize_irq(touchdev->pen_irq); + + gpiod_set_value(touchdev->gpio_ym, 0); + pinctrl_pm_select_default_state(dev); + + dev_dbg(dev, "Input device %s closed, disable touch detection\n", + dev_input->name); +} + +static int vf50_ts_get_gpiod(struct device *dev, struct gpio_desc **gpio_d, + const char *con_id, enum gpiod_flags flags) +{ + int error; + + *gpio_d = devm_gpiod_get(dev, con_id, flags); + if (IS_ERR(*gpio_d)) { + error = PTR_ERR(*gpio_d); + dev_err(dev, "Could not get gpio_%s %d\n", con_id, error); + return error; + } + + return 0; +} + +static void vf50_ts_channel_release(void *data) +{ + struct iio_channel *channels = data; + + iio_channel_release_all(channels); +} + +static int vf50_ts_probe(struct platform_device *pdev) +{ + struct input_dev *input; + struct iio_channel *channels; + struct device *dev = &pdev->dev; + struct vf50_touch_device *touchdev; + int num_adc_channels; + int error; + + channels = iio_channel_get_all(dev); + if (IS_ERR(channels)) + return PTR_ERR(channels); + + error = devm_add_action(dev, vf50_ts_channel_release, channels); + if (error) { + iio_channel_release_all(channels); + dev_err(dev, "Failed to register iio channel release action"); + return error; + } + + num_adc_channels = 0; + while (channels[num_adc_channels].indio_dev) + num_adc_channels++; + + if (num_adc_channels != COLI_TOUCH_REQ_ADC_CHAN) { + dev_err(dev, "Inadequate ADC channels specified\n"); + return -EINVAL; + } + + touchdev = devm_kzalloc(dev, sizeof(*touchdev), GFP_KERNEL); + if (!touchdev) + return -ENOMEM; + + touchdev->pdev = pdev; + touchdev->channels = channels; + + error = of_property_read_u32(dev->of_node, "vf50-ts-min-pressure", + &touchdev->min_pressure); + if (error) + return error; + + input = devm_input_allocate_device(dev); + if (!input) { + dev_err(dev, "Failed to allocate TS input device\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, touchdev); + + input->name = DRIVER_NAME; + input->id.bustype = BUS_HOST; + input->dev.parent = dev; + input->open = vf50_ts_open; + input->close = vf50_ts_close; + + input_set_capability(input, EV_KEY, BTN_TOUCH); + input_set_abs_params(input, ABS_X, 0, VF_ADC_MAX, 0, 0); + input_set_abs_params(input, ABS_Y, 0, VF_ADC_MAX, 0, 0); + input_set_abs_params(input, ABS_PRESSURE, 0, VF_ADC_MAX, 0, 0); + + touchdev->ts_input = input; + input_set_drvdata(input, touchdev); + + error = input_register_device(input); + if (error) { + dev_err(dev, "Failed to register input device\n"); + return error; + } + + error = vf50_ts_get_gpiod(dev, &touchdev->gpio_xp, "xp", GPIOD_OUT_LOW); + if (error) + return error; + + error = vf50_ts_get_gpiod(dev, &touchdev->gpio_xm, + "xm", GPIOD_OUT_LOW); + if (error) + return error; + + error = vf50_ts_get_gpiod(dev, &touchdev->gpio_yp, "yp", GPIOD_OUT_LOW); + if (error) + return error; + + error = vf50_ts_get_gpiod(dev, &touchdev->gpio_ym, "ym", GPIOD_OUT_LOW); + if (error) + return error; + + touchdev->pen_irq = platform_get_irq(pdev, 0); + if (touchdev->pen_irq < 0) + return touchdev->pen_irq; + + error = devm_request_threaded_irq(dev, touchdev->pen_irq, + NULL, vf50_ts_irq_bh, IRQF_ONESHOT, + "vf50 touch", touchdev); + if (error) { + dev_err(dev, "Failed to request IRQ %d: %d\n", + touchdev->pen_irq, error); + return error; + } + + return 0; +} + +static const struct of_device_id vf50_touch_of_match[] = { + { .compatible = "toradex,vf50-touchscreen", }, + { } +}; +MODULE_DEVICE_TABLE(of, vf50_touch_of_match); + +static struct platform_driver vf50_touch_driver = { + .driver = { + .name = "toradex,vf50_touchctrl", + .of_match_table = vf50_touch_of_match, + }, + .probe = vf50_ts_probe, +}; +module_platform_driver(vf50_touch_driver); + +MODULE_AUTHOR("Sanchayan Maity"); +MODULE_DESCRIPTION("Colibri VF50 Touchscreen driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); -- cgit v0.10.2 From 9a436d524d3533cd15ed5a189d2237ff1e4e5343 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Sat, 5 Sep 2015 11:31:21 -0700 Subject: Input: touchscreen - add imx6ul_tsc driver support Freescale i.MX6UL contains a internal touchscreen controller, this patch add a driver to support this controller. Signed-off-by: Haibo Chen Signed-off-by: Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt b/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt new file mode 100644 index 0000000..853dff9 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt @@ -0,0 +1,36 @@ +* Freescale i.MX6UL Touch Controller + +Required properties: +- compatible: must be "fsl,imx6ul-tsc". +- reg: this touch controller address and the ADC2 address. +- interrupts: the interrupt of this touch controller and ADC2. +- clocks: the root clock of touch controller and ADC2. +- clock-names; must be "tsc" and "adc". +- xnur-gpio: the X- gpio this controller connect to. + This xnur-gpio returns to low once the finger leave the touch screen (The + last touch event the touch controller capture). + +Optional properties: +- measure-delay-time: the value of measure delay time. + Before X-axis or Y-axis measurement, the screen need some time before + even potential distribution ready. + This value depends on the touch screen. +- pre-charge-time: the touch screen need some time to precharge. + This value depends on the touch screen. + +Example: + tsc: tsc@02040000 { + compatible = "fsl,imx6ul-tsc"; + reg = <0x02040000 0x4000>, <0x0219c000 0x4000>; + interrupts = , + ; + clocks = <&clks IMX6UL_CLK_IPG>, + <&clks IMX6UL_CLK_ADC2>; + clock-names = "tsc", "adc"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_tsc>; + xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; + measure-delay-time = <0xfff>; + pre-charge-time = <0xffff>; + status = "okay"; + }; diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index a6d7a4d..600dccef 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -479,6 +479,18 @@ config TOUCHSCREEN_MTOUCH To compile this driver as a module, choose M here: the module will be called mtouch. +config TOUCHSCREEN_IMX6UL_TSC + tristate "Freescale i.MX6UL touchscreen controller" + depends on (OF && GPIOLIB) || COMPILE_TEST + help + Say Y here if you have a Freescale i.MX6UL, and want to + use the internal touchscreen controller. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called imx6ul_tsc. + config TOUCHSCREEN_INEXIO tristate "iNexio serial touchscreens" select SERIO diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index fb27f7e..1b79cc0 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_TOUCHSCREEN_EGALAX) += egalax_ts.o obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix.o obj-$(CONFIG_TOUCHSCREEN_ILI210X) += ili210x.o +obj-$(CONFIG_TOUCHSCREEN_IMX6UL_TSC) += imx6ul_tsc.o obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o obj-$(CONFIG_TOUCHSCREEN_INTEL_MID) += intel-mid-touch.o obj-$(CONFIG_TOUCHSCREEN_IPROC) += bcm_iproc_tsc.o diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c new file mode 100644 index 0000000..ff0b758 --- /dev/null +++ b/drivers/input/touchscreen/imx6ul_tsc.c @@ -0,0 +1,523 @@ +/* + * Freescale i.MX6UL touchscreen controller driver + * + * Copyright (C) 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ADC configuration registers field define */ +#define ADC_AIEN (0x1 << 7) +#define ADC_CONV_DISABLE 0x1F +#define ADC_CAL (0x1 << 7) +#define ADC_CALF 0x2 +#define ADC_12BIT_MODE (0x2 << 2) +#define ADC_IPG_CLK 0x00 +#define ADC_CLK_DIV_8 (0x03 << 5) +#define ADC_SHORT_SAMPLE_MODE (0x0 << 4) +#define ADC_HARDWARE_TRIGGER (0x1 << 13) +#define SELECT_CHANNEL_4 0x04 +#define SELECT_CHANNEL_1 0x01 +#define DISABLE_CONVERSION_INT (0x0 << 7) + +/* ADC registers */ +#define REG_ADC_HC0 0x00 +#define REG_ADC_HC1 0x04 +#define REG_ADC_HC2 0x08 +#define REG_ADC_HC3 0x0C +#define REG_ADC_HC4 0x10 +#define REG_ADC_HS 0x14 +#define REG_ADC_R0 0x18 +#define REG_ADC_CFG 0x2C +#define REG_ADC_GC 0x30 +#define REG_ADC_GS 0x34 + +#define ADC_TIMEOUT msecs_to_jiffies(100) + +/* TSC registers */ +#define REG_TSC_BASIC_SETING 0x00 +#define REG_TSC_PRE_CHARGE_TIME 0x10 +#define REG_TSC_FLOW_CONTROL 0x20 +#define REG_TSC_MEASURE_VALUE 0x30 +#define REG_TSC_INT_EN 0x40 +#define REG_TSC_INT_SIG_EN 0x50 +#define REG_TSC_INT_STATUS 0x60 +#define REG_TSC_DEBUG_MODE 0x70 +#define REG_TSC_DEBUG_MODE2 0x80 + +/* TSC configuration registers field define */ +#define DETECT_4_WIRE_MODE (0x0 << 4) +#define AUTO_MEASURE 0x1 +#define MEASURE_SIGNAL 0x1 +#define DETECT_SIGNAL (0x1 << 4) +#define VALID_SIGNAL (0x1 << 8) +#define MEASURE_INT_EN 0x1 +#define MEASURE_SIG_EN 0x1 +#define VALID_SIG_EN (0x1 << 8) +#define DE_GLITCH_2 (0x2 << 29) +#define START_SENSE (0x1 << 12) +#define TSC_DISABLE (0x1 << 16) +#define DETECT_MODE 0x2 + +struct imx6ul_tsc { + struct device *dev; + struct input_dev *input; + void __iomem *tsc_regs; + void __iomem *adc_regs; + struct clk *tsc_clk; + struct clk *adc_clk; + struct gpio_desc *xnur_gpio; + + int measure_delay_time; + int pre_charge_time; + + struct completion completion; +}; + +/* + * TSC module need ADC to get the measure value. So + * before config TSC, we should initialize ADC module. + */ +static void imx6ul_adc_init(struct imx6ul_tsc *tsc) +{ + int adc_hc = 0; + int adc_gc; + int adc_gs; + int adc_cfg; + int timeout; + + reinit_completion(&tsc->completion); + + adc_cfg = readl(tsc->adc_regs + REG_ADC_CFG); + adc_cfg |= ADC_12BIT_MODE | ADC_IPG_CLK; + adc_cfg |= ADC_CLK_DIV_8 | ADC_SHORT_SAMPLE_MODE; + adc_cfg &= ~ADC_HARDWARE_TRIGGER; + writel(adc_cfg, tsc->adc_regs + REG_ADC_CFG); + + /* enable calibration interrupt */ + adc_hc |= ADC_AIEN; + adc_hc |= ADC_CONV_DISABLE; + writel(adc_hc, tsc->adc_regs + REG_ADC_HC0); + + /* start ADC calibration */ + adc_gc = readl(tsc->adc_regs + REG_ADC_GC); + adc_gc |= ADC_CAL; + writel(adc_gc, tsc->adc_regs + REG_ADC_GC); + + timeout = wait_for_completion_timeout + (&tsc->completion, ADC_TIMEOUT); + if (timeout == 0) + dev_err(tsc->dev, "Timeout for adc calibration\n"); + + adc_gs = readl(tsc->adc_regs + REG_ADC_GS); + if (adc_gs & ADC_CALF) + dev_err(tsc->dev, "ADC calibration failed\n"); + + /* TSC need the ADC work in hardware trigger */ + adc_cfg = readl(tsc->adc_regs + REG_ADC_CFG); + adc_cfg |= ADC_HARDWARE_TRIGGER; + writel(adc_cfg, tsc->adc_regs + REG_ADC_CFG); +} + +/* + * This is a TSC workaround. Currently TSC misconnect two + * ADC channels, this function remap channel configure for + * hardware trigger. + */ +static void imx6ul_tsc_channel_config(struct imx6ul_tsc *tsc) +{ + int adc_hc0, adc_hc1, adc_hc2, adc_hc3, adc_hc4; + + adc_hc0 = DISABLE_CONVERSION_INT; + writel(adc_hc0, tsc->adc_regs + REG_ADC_HC0); + + adc_hc1 = DISABLE_CONVERSION_INT | SELECT_CHANNEL_4; + writel(adc_hc1, tsc->adc_regs + REG_ADC_HC1); + + adc_hc2 = DISABLE_CONVERSION_INT; + writel(adc_hc2, tsc->adc_regs + REG_ADC_HC2); + + adc_hc3 = DISABLE_CONVERSION_INT | SELECT_CHANNEL_1; + writel(adc_hc3, tsc->adc_regs + REG_ADC_HC3); + + adc_hc4 = DISABLE_CONVERSION_INT; + writel(adc_hc4, tsc->adc_regs + REG_ADC_HC4); +} + +/* + * TSC setting, confige the pre-charge time and measure delay time. + * different touch screen may need different pre-charge time and + * measure delay time. + */ +static void imx6ul_tsc_set(struct imx6ul_tsc *tsc) +{ + int basic_setting = 0; + int start; + + basic_setting |= tsc->measure_delay_time << 8; + basic_setting |= DETECT_4_WIRE_MODE | AUTO_MEASURE; + writel(basic_setting, tsc->tsc_regs + REG_TSC_BASIC_SETING); + + writel(DE_GLITCH_2, tsc->tsc_regs + REG_TSC_DEBUG_MODE2); + + writel(tsc->pre_charge_time, tsc->tsc_regs + REG_TSC_PRE_CHARGE_TIME); + writel(MEASURE_INT_EN, tsc->tsc_regs + REG_TSC_INT_EN); + writel(MEASURE_SIG_EN | VALID_SIG_EN, + tsc->tsc_regs + REG_TSC_INT_SIG_EN); + + /* start sense detection */ + start = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL); + start |= START_SENSE; + start &= ~TSC_DISABLE; + writel(start, tsc->tsc_regs + REG_TSC_FLOW_CONTROL); +} + +static void imx6ul_tsc_init(struct imx6ul_tsc *tsc) +{ + imx6ul_adc_init(tsc); + imx6ul_tsc_channel_config(tsc); + imx6ul_tsc_set(tsc); +} + +static void imx6ul_tsc_disable(struct imx6ul_tsc *tsc) +{ + int tsc_flow; + int adc_cfg; + + /* TSC controller enters to idle status */ + tsc_flow = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL); + tsc_flow |= TSC_DISABLE; + writel(tsc_flow, tsc->tsc_regs + REG_TSC_FLOW_CONTROL); + + /* ADC controller enters to stop mode */ + adc_cfg = readl(tsc->adc_regs + REG_ADC_HC0); + adc_cfg |= ADC_CONV_DISABLE; + writel(adc_cfg, tsc->adc_regs + REG_ADC_HC0); +} + +/* Delay some time (max 2ms), wait the pre-charge done. */ +static bool tsc_wait_detect_mode(struct imx6ul_tsc *tsc) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(2); + int state_machine; + int debug_mode2; + + do { + if (time_after(jiffies, timeout)) + return false; + + usleep_range(200, 400); + debug_mode2 = readl(tsc->tsc_regs + REG_TSC_DEBUG_MODE2); + state_machine = (debug_mode2 >> 20) & 0x7; + } while (state_machine != DETECT_MODE); + + usleep_range(200, 400); + return true; +} + +static irqreturn_t tsc_irq_fn(int irq, void *dev_id) +{ + struct imx6ul_tsc *tsc = dev_id; + int status; + int value; + int x, y; + int start; + + status = readl(tsc->tsc_regs + REG_TSC_INT_STATUS); + + /* write 1 to clear the bit measure-signal */ + writel(MEASURE_SIGNAL | DETECT_SIGNAL, + tsc->tsc_regs + REG_TSC_INT_STATUS); + + /* It's a HW self-clean bit. Set this bit and start sense detection */ + start = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL); + start |= START_SENSE; + writel(start, tsc->tsc_regs + REG_TSC_FLOW_CONTROL); + + if (status & MEASURE_SIGNAL) { + value = readl(tsc->tsc_regs + REG_TSC_MEASURE_VALUE); + x = (value >> 16) & 0x0fff; + y = value & 0x0fff; + + /* + * In detect mode, we can get the xnur gpio value, + * otherwise assume contact is stiull active. + */ + if (!tsc_wait_detect_mode(tsc) || + gpiod_get_value_cansleep(tsc->xnur_gpio)) { + input_report_key(tsc->input, BTN_TOUCH, 1); + input_report_abs(tsc->input, ABS_X, x); + input_report_abs(tsc->input, ABS_Y, y); + } else { + input_report_key(tsc->input, BTN_TOUCH, 0); + } + + input_sync(tsc->input); + } + + return IRQ_HANDLED; +} + +static irqreturn_t adc_irq_fn(int irq, void *dev_id) +{ + struct imx6ul_tsc *tsc = dev_id; + int coco; + int value; + + coco = readl(tsc->adc_regs + REG_ADC_HS); + if (coco & 0x01) { + value = readl(tsc->adc_regs + REG_ADC_R0); + complete(&tsc->completion); + } + + return IRQ_HANDLED; +} + +static int imx6ul_tsc_open(struct input_dev *input_dev) +{ + struct imx6ul_tsc *tsc = input_get_drvdata(input_dev); + int err; + + err = clk_prepare_enable(tsc->adc_clk); + if (err) { + dev_err(tsc->dev, + "Could not prepare or enable the adc clock: %d\n", + err); + return err; + } + + err = clk_prepare_enable(tsc->tsc_clk); + if (err) { + dev_err(tsc->dev, + "Could not prepare or enable the tsc clock: %d\n", + err); + clk_disable_unprepare(tsc->adc_clk); + return err; + } + + imx6ul_tsc_init(tsc); + + return 0; +} + +static void imx6ul_tsc_close(struct input_dev *input_dev) +{ + struct imx6ul_tsc *tsc = input_get_drvdata(input_dev); + + imx6ul_tsc_disable(tsc); + + clk_disable_unprepare(tsc->tsc_clk); + clk_disable_unprepare(tsc->adc_clk); +} + +static int imx6ul_tsc_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct imx6ul_tsc *tsc; + struct input_dev *input_dev; + struct resource *tsc_mem; + struct resource *adc_mem; + int err; + int tsc_irq; + int adc_irq; + + tsc = devm_kzalloc(&pdev->dev, sizeof(struct imx6ul_tsc), GFP_KERNEL); + if (!tsc) + return -ENOMEM; + + input_dev = devm_input_allocate_device(&pdev->dev); + if (!input_dev) + return -ENOMEM; + + input_dev->name = "iMX6UL TouchScreen Controller"; + input_dev->id.bustype = BUS_HOST; + + input_dev->open = imx6ul_tsc_open; + input_dev->close = imx6ul_tsc_close; + + input_set_capability(input_dev, EV_KEY, BTN_TOUCH); + input_set_abs_params(input_dev, ABS_X, 0, 0xFFF, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 0, 0xFFF, 0, 0); + + input_set_drvdata(input_dev, tsc); + + tsc->dev = &pdev->dev; + tsc->input = input_dev; + init_completion(&tsc->completion); + + tsc->xnur_gpio = devm_gpiod_get(&pdev->dev, "xnur", GPIOD_IN); + if (IS_ERR(tsc->xnur_gpio)) { + err = PTR_ERR(tsc->xnur_gpio); + dev_err(&pdev->dev, + "failed to request GPIO tsc_X- (xnur): %d\n", err); + return err; + } + + tsc_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tsc->tsc_regs = devm_ioremap_resource(&pdev->dev, tsc_mem); + if (IS_ERR(tsc->tsc_regs)) { + err = PTR_ERR(tsc->tsc_regs); + dev_err(&pdev->dev, "failed to remap tsc memory: %d\n", err); + return err; + } + + adc_mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + tsc->adc_regs = devm_ioremap_resource(&pdev->dev, adc_mem); + if (IS_ERR(tsc->adc_regs)) { + err = PTR_ERR(tsc->adc_regs); + dev_err(&pdev->dev, "failed to remap adc memory: %d\n", err); + return err; + } + + tsc->tsc_clk = devm_clk_get(&pdev->dev, "tsc"); + if (IS_ERR(tsc->tsc_clk)) { + err = PTR_ERR(tsc->tsc_clk); + dev_err(&pdev->dev, "failed getting tsc clock: %d\n", err); + return err; + } + + tsc->adc_clk = devm_clk_get(&pdev->dev, "adc"); + if (IS_ERR(tsc->adc_clk)) { + err = PTR_ERR(tsc->adc_clk); + dev_err(&pdev->dev, "failed getting adc clock: %d\n", err); + return err; + } + + tsc_irq = platform_get_irq(pdev, 0); + if (tsc_irq < 0) { + dev_err(&pdev->dev, "no tsc irq resource?\n"); + return tsc_irq; + } + + adc_irq = platform_get_irq(pdev, 1); + if (adc_irq <= 0) { + dev_err(&pdev->dev, "no adc irq resource?\n"); + return adc_irq; + } + + err = devm_request_threaded_irq(tsc->dev, tsc_irq, + NULL, tsc_irq_fn, IRQF_ONESHOT, + dev_name(&pdev->dev), tsc); + if (err) { + dev_err(&pdev->dev, + "failed requesting tsc irq %d: %d\n", + tsc_irq, err); + return err; + } + + err = devm_request_irq(tsc->dev, adc_irq, adc_irq_fn, 0, + dev_name(&pdev->dev), tsc); + if (err) { + dev_err(&pdev->dev, + "failed requesting adc irq %d: %d\n", + adc_irq, err); + return err; + } + + err = of_property_read_u32(np, "measure-delay-time", + &tsc->measure_delay_time); + if (err) + tsc->measure_delay_time = 0xffff; + + err = of_property_read_u32(np, "pre-charge-time", + &tsc->pre_charge_time); + if (err) + tsc->pre_charge_time = 0xfff; + + err = input_register_device(tsc->input); + if (err) { + dev_err(&pdev->dev, + "failed to register input device: %d\n", err); + return err; + } + + platform_set_drvdata(pdev, tsc); + return 0; +} + +static int __maybe_unused imx6ul_tsc_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct imx6ul_tsc *tsc = platform_get_drvdata(pdev); + struct input_dev *input_dev = tsc->input; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) { + imx6ul_tsc_disable(tsc); + + clk_disable_unprepare(tsc->tsc_clk); + clk_disable_unprepare(tsc->adc_clk); + } + + mutex_unlock(&input_dev->mutex); + + return 0; +} + +static int __maybe_unused imx6ul_tsc_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct imx6ul_tsc *tsc = platform_get_drvdata(pdev); + struct input_dev *input_dev = tsc->input; + int retval = 0; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) { + retval = clk_prepare_enable(tsc->adc_clk); + if (retval) + goto out; + + retval = clk_prepare_enable(tsc->tsc_clk); + if (retval) { + clk_disable_unprepare(tsc->adc_clk); + goto out; + } + + imx6ul_tsc_init(tsc); + } + +out: + mutex_unlock(&input_dev->mutex); + return retval; +} + +static SIMPLE_DEV_PM_OPS(imx6ul_tsc_pm_ops, + imx6ul_tsc_suspend, imx6ul_tsc_resume); + +static const struct of_device_id imx6ul_tsc_match[] = { + { .compatible = "fsl,imx6ul-tsc", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx6ul_tsc_match); + +static struct platform_driver imx6ul_tsc_driver = { + .driver = { + .name = "imx6ul-tsc", + .of_match_table = imx6ul_tsc_match, + .pm = &imx6ul_tsc_pm_ops, + }, + .probe = imx6ul_tsc_probe, +}; +module_platform_driver(imx6ul_tsc_driver); + +MODULE_AUTHOR("Haibo Chen "); +MODULE_DESCRIPTION("Freescale i.MX6UL Touchscreen controller driver"); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From ade9c1a47c811a7ae1c874882ad9178af1ed1098 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 19 Feb 2014 13:38:13 -0800 Subject: Input: imx_keypad - remove obsolete comment Since commit 81e8f2bc (Input: imx_keypad - add pm suspend and resume support) the imx_keypad driver supports power management, so let's remove the obsolete comment. Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c index d2ea863..2165f3d 100644 --- a/drivers/input/keyboard/imx_keypad.c +++ b/drivers/input/keyboard/imx_keypad.c @@ -5,8 +5,6 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * <>. */ #include -- cgit v0.10.2 From a394d635193b641f2c86ead5ada5b115d57c51f8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 6 Sep 2015 01:46:54 +0300 Subject: spi: Fix documentation of spi_alloc_master() Actually, spi_master_put() after spi_alloc_master() must _not_ be followed by kfree(). The memory is already freed with the call to spi_master_put() through spi_master_class, which registers a release function. Calling both spi_master_put() and kfree() results in often nasty (and delayed) crashes elsewhere in the kernel, often in the networking stack. This reverts commit eb4af0f5349235df2e4a5057a72fc8962d00308a. Link to patch and concerns: https://lkml.org/lkml/2012/9/3/269 or http://lkml.iu.edu/hypermail/linux/kernel/1209.0/00790.html Alexey Klimov: This revert becomes valid after 94c69f765f1b4a658d96905ec59928e3e3e07e6a when spi-imx.c has been fixed and there is no need to call kfree() so comment for spi_alloc_master() should be fixed. Signed-off-by: Guenter Roeck Signed-off-by: Alexey Klimov Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index cf8b91b..9ce2f15 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1437,8 +1437,7 @@ static struct class spi_master_class = { * * The caller is responsible for assigning the bus number and initializing * the master's methods before calling spi_register_master(); and (after errors - * adding the device) calling spi_master_put() and kfree() to prevent a memory - * leak. + * adding the device) calling spi_master_put() to prevent a memory leak. */ struct spi_master *spi_alloc_master(struct device *dev, unsigned size) { -- cgit v0.10.2 From 8901c18b6cafa51f7985f1031968bbfe9dc47735 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 2 Sep 2015 16:10:34 -0600 Subject: asus-nb-wmi: Add wapf=4 quirk for X456UA/X456UF These laptops boot with wifi as hard-blocked, with no obvious way to enable it. Using a quirk to set wapf=4 solves the problem. Signed-off-by: Daniel Drake Acked-by: Corentin Chary Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index abdaed3..131fee2 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -128,6 +128,24 @@ static const struct dmi_system_id asus_quirks[] = { }, { .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UF", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X501U", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -- cgit v0.10.2 From d224fe0d609734888af63656ddaf3a8352f0a7b5 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 13 Aug 2015 18:48:09 -0700 Subject: mpt2sas: Refcount sas_device objects and fix unsafe list usage These objects can be referenced concurrently throughout the driver, we need a way to make sure threads can't delete them out from under each other. This patch adds the refcount, and refactors the code to use it. Additionally, we cannot iterate over the sas_device_list without holding the lock, or we risk corrupting random memory if items are added or deleted as we iterate. This patch refactors _scsih_probe_sas() to use the sas_device_list in a safe way. Signed-off-by: Calvin Owens Reviewed-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Tested-by: Chaitra Basappa Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index caff8d1..78f41ac 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -238,6 +238,7 @@ * @flags: MPT_TARGET_FLAGS_XXX flags * @deleted: target flaged for deletion * @tm_busy: target is busy with TM request. + * @sdev: The sas_device associated with this target */ struct MPT2SAS_TARGET { struct scsi_target *starget; @@ -248,6 +249,7 @@ struct MPT2SAS_TARGET { u32 flags; u8 deleted; u8 tm_busy; + struct _sas_device *sdev; }; @@ -376,8 +378,24 @@ struct _sas_device { u8 phy; u8 responding; u8 pfa_led_on; + struct kref refcount; }; +static inline void sas_device_get(struct _sas_device *s) +{ + kref_get(&s->refcount); +} + +static inline void sas_device_free(struct kref *r) +{ + kfree(container_of(r, struct _sas_device, refcount)); +} + +static inline void sas_device_put(struct _sas_device *s) +{ + kref_put(&s->refcount, sas_device_free); +} + /** * struct _raid_device - raid volume link list * @list: sas device list @@ -1095,7 +1113,9 @@ struct _sas_node *mpt2sas_scsih_expander_find_by_handle(struct MPT2SAS_ADAPTER * u16 handle); struct _sas_node *mpt2sas_scsih_expander_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, u64 sas_address); -struct _sas_device *mpt2sas_scsih_sas_device_find_by_sas_address( +struct _sas_device *mpt2sas_get_sdev_by_addr( + struct MPT2SAS_ADAPTER *ioc, u64 sas_address); +struct _sas_device *__mpt2sas_get_sdev_by_addr( struct MPT2SAS_ADAPTER *ioc, u64 sas_address); void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 3f26147..5eca3a4 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -526,8 +526,61 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, } } +static struct _sas_device * +__mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc, + struct MPT2SAS_TARGET *tgt_priv) +{ + struct _sas_device *ret; + + assert_spin_locked(&ioc->sas_device_lock); + + ret = tgt_priv->sdev; + if (ret) + sas_device_get(ret); + + return ret; +} + +static struct _sas_device * +mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc, + struct MPT2SAS_TARGET *tgt_priv) +{ + struct _sas_device *ret; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + ret = __mpt2sas_get_sdev_from_target(ioc, tgt_priv); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return ret; +} + + +struct _sas_device * +__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, + u64 sas_address) +{ + struct _sas_device *sas_device; + + assert_spin_locked(&ioc->sas_device_lock); + + list_for_each_entry(sas_device, &ioc->sas_device_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + return NULL; + +found_device: + sas_device_get(sas_device); + return sas_device; +} + /** - * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search + * mpt2sas_get_sdev_by_addr - sas device search * @ioc: per adapter object * @sas_address: sas address * Context: Calling function should acquire ioc->sas_device_lock @@ -536,24 +589,44 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, * object. */ struct _sas_device * -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, +mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, u64 sas_address) { struct _sas_device *sas_device; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = __mpt2sas_get_sdev_by_addr(ioc, + sas_address); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return sas_device; +} + +static struct _sas_device * +__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) +{ + struct _sas_device *sas_device; + + assert_spin_locked(&ioc->sas_device_lock); list_for_each_entry(sas_device, &ioc->sas_device_list, list) - if (sas_device->sas_address == sas_address) - return sas_device; + if (sas_device->handle == handle) + goto found_device; list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) - if (sas_device->sas_address == sas_address) - return sas_device; + if (sas_device->handle == handle) + goto found_device; return NULL; + +found_device: + sas_device_get(sas_device); + return sas_device; } /** - * _scsih_sas_device_find_by_handle - sas device search + * mpt2sas_get_sdev_by_handle - sas device search * @ioc: per adapter object * @handle: sas device handle (assigned by firmware) * Context: Calling function should acquire ioc->sas_device_lock @@ -562,19 +635,16 @@ mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, * object. */ static struct _sas_device * -_scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) +mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) { struct _sas_device *sas_device; + unsigned long flags; - list_for_each_entry(sas_device, &ioc->sas_device_list, list) - if (sas_device->handle == handle) - return sas_device; - - list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) - if (sas_device->handle == handle) - return sas_device; + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return NULL; + return sas_device; } /** @@ -583,7 +653,7 @@ _scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) * @sas_device: the sas_device object * Context: This function will acquire ioc->sas_device_lock. * - * Removing object and freeing associated memory from the ioc->sas_device_list. + * If sas_device is on the list, remove it and decrement its reference count. */ static void _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc, @@ -594,9 +664,15 @@ _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc, if (!sas_device) return; + /* + * The lock serializes access to the list, but we still need to verify + * that nobody removed the entry while we were waiting on the lock. + */ spin_lock_irqsave(&ioc->sas_device_lock, flags); - list_del(&sas_device->list); - kfree(sas_device); + if (!list_empty(&sas_device->list)) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -620,6 +696,7 @@ _scsih_sas_device_add(struct MPT2SAS_ADAPTER *ioc, sas_device->handle, (unsigned long long)sas_device->sas_address)); spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device_get(sas_device); list_add_tail(&sas_device->list, &ioc->sas_device_list); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -659,6 +736,7 @@ _scsih_sas_device_init_add(struct MPT2SAS_ADAPTER *ioc, sas_device->handle, (unsigned long long)sas_device->sas_address)); spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device_get(sas_device); list_add_tail(&sas_device->list, &ioc->sas_device_init_list); _scsih_determine_boot_device(ioc, sas_device, 0); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -1208,12 +1286,15 @@ _scsih_change_queue_depth(struct scsi_device *sdev, int qdepth) goto not_sata; if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) goto not_sata; + spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_device_priv_data->sas_target->sas_address); - if (sas_device && sas_device->device_info & - MPI2_SAS_DEVICE_INFO_SATA_DEVICE) - max_depth = MPT2SAS_SATA_QUEUE_DEPTH; + sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data); + if (sas_device) { + if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_SATA_DEVICE) + max_depth = MPT2SAS_SATA_QUEUE_DEPTH; + + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); not_sata: @@ -1271,18 +1352,20 @@ _scsih_target_alloc(struct scsi_target *starget) /* sas/sata devices */ spin_lock_irqsave(&ioc->sas_device_lock, flags); rphy = dev_to_rphy(starget->dev.parent); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, rphy->identify.sas_address); if (sas_device) { sas_target_priv_data->handle = sas_device->handle; sas_target_priv_data->sas_address = sas_device->sas_address; + sas_target_priv_data->sdev = sas_device; sas_device->starget = starget; sas_device->id = starget->id; sas_device->channel = starget->channel; if (test_bit(sas_device->handle, ioc->pd_handles)) sas_target_priv_data->flags |= MPT_TARGET_FLAGS_RAID_COMPONENT; + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -1324,13 +1407,21 @@ _scsih_target_destroy(struct scsi_target *starget) spin_lock_irqsave(&ioc->sas_device_lock, flags); rphy = dev_to_rphy(starget->dev.parent); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - rphy->identify.sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data); if (sas_device && (sas_device->starget == starget) && (sas_device->id == starget->id) && (sas_device->channel == starget->channel)) sas_device->starget = NULL; + if (sas_device) { + /* + * Corresponding get() is in _scsih_target_alloc() + */ + sas_target_priv_data->sdev = NULL; + sas_device_put(sas_device); + + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); out: @@ -1386,7 +1477,7 @@ _scsih_slave_alloc(struct scsi_device *sdev) if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_target_priv_data->sas_address); if (sas_device && (sas_device->starget == NULL)) { sdev_printk(KERN_INFO, sdev, @@ -1394,6 +1485,10 @@ _scsih_slave_alloc(struct scsi_device *sdev) __func__, __LINE__); sas_device->starget = starget; } + + if (sas_device) + sas_device_put(sas_device); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -1428,10 +1523,13 @@ _scsih_slave_destroy(struct scsi_device *sdev) if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_target_priv_data->sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, + sas_target_priv_data); if (sas_device && !sas_target_priv_data->num_luns) sas_device->starget = NULL; + + if (sas_device) + sas_device_put(sas_device); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -2078,7 +2176,7 @@ _scsih_slave_configure(struct scsi_device *sdev) } spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_device_priv_data->sas_target->sas_address); if (!sas_device) { spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -2112,17 +2210,18 @@ _scsih_slave_configure(struct scsi_device *sdev) (unsigned long long) sas_device->enclosure_logical_id, sas_device->slot); + sas_device_put(sas_device); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); if (!ssp_target) _scsih_display_sata_capabilities(ioc, handle, sdev); - _scsih_change_queue_depth(sdev, qdepth); if (ssp_target) { sas_read_port_mode_page(sdev); _scsih_enable_tlr(ioc, sdev); } + return 0; } @@ -2509,8 +2608,7 @@ _scsih_tm_display_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd) device_str, (unsigned long long)priv_target->sas_address); } else { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - priv_target->sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, priv_target); if (sas_device) { if (priv_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { @@ -2529,6 +2627,8 @@ _scsih_tm_display_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd) "enclosure_logical_id(0x%016llx), slot(%d)\n", (unsigned long long)sas_device->enclosure_logical_id, sas_device->slot); + + sas_device_put(sas_device); } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -2604,12 +2704,12 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) { struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT2SAS_DEVICE *sas_device_priv_data; - struct _sas_device *sas_device; - unsigned long flags; + struct _sas_device *sas_device = NULL; u16 handle; int r; struct scsi_target *starget = scmd->device->sdev_target; + struct MPT2SAS_TARGET *target_priv_data = starget->hostdata; starget_printk(KERN_INFO, starget, "attempting device reset! " "scmd(%p)\n", scmd); @@ -2629,12 +2729,10 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) handle = 0; if (sas_device_priv_data->sas_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, - sas_device_priv_data->sas_target->handle); + sas_device = mpt2sas_get_sdev_from_target(ioc, + target_priv_data); if (sas_device) handle = sas_device->volume_handle; - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } else handle = sas_device_priv_data->sas_target->handle; @@ -2651,6 +2749,10 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) out: sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n", ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd); + + if (sas_device) + sas_device_put(sas_device); + return r; } @@ -2665,11 +2767,11 @@ _scsih_target_reset(struct scsi_cmnd *scmd) { struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT2SAS_DEVICE *sas_device_priv_data; - struct _sas_device *sas_device; - unsigned long flags; + struct _sas_device *sas_device = NULL; u16 handle; int r; struct scsi_target *starget = scmd->device->sdev_target; + struct MPT2SAS_TARGET *target_priv_data = starget->hostdata; starget_printk(KERN_INFO, starget, "attempting target reset! " "scmd(%p)\n", scmd); @@ -2689,12 +2791,10 @@ _scsih_target_reset(struct scsi_cmnd *scmd) handle = 0; if (sas_device_priv_data->sas_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, - sas_device_priv_data->sas_target->handle); + sas_device = mpt2sas_get_sdev_from_target(ioc, + target_priv_data); if (sas_device) handle = sas_device->volume_handle; - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } else handle = sas_device_priv_data->sas_target->handle; @@ -2711,6 +2811,10 @@ _scsih_target_reset(struct scsi_cmnd *scmd) out: starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n", ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd); + + if (sas_device) + sas_device_put(sas_device); + return r; } @@ -3002,15 +3106,15 @@ _scsih_block_io_to_children_attached_to_ex(struct MPT2SAS_ADAPTER *ioc, list_for_each_entry(mpt2sas_port, &sas_expander->sas_port_list, port_list) { - if (mpt2sas_port->remote_identify.device_type == - SAS_END_DEVICE) { + if (mpt2sas_port->remote_identify.device_type == SAS_END_DEVICE) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = - mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - mpt2sas_port->remote_identify.sas_address); - if (sas_device) + sas_device = __mpt2sas_get_sdev_by_addr(ioc, + mpt2sas_port->remote_identify.sas_address); + if (sas_device) { set_bit(sas_device->handle, - ioc->blocking_handles); + ioc->blocking_handles); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } } @@ -3080,7 +3184,7 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) { Mpi2SCSITaskManagementRequest_t *mpi_request; u16 smid; - struct _sas_device *sas_device; + struct _sas_device *sas_device = NULL; struct MPT2SAS_TARGET *sas_target_priv_data = NULL; u64 sas_address = 0; unsigned long flags; @@ -3110,7 +3214,7 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device && sas_device->starget && sas_device->starget->hostdata) { sas_target_priv_data = sas_device->starget->hostdata; @@ -3131,14 +3235,14 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) if (!smid) { delayed_tr = kzalloc(sizeof(*delayed_tr), GFP_ATOMIC); if (!delayed_tr) - return; + goto out; INIT_LIST_HEAD(&delayed_tr->list); delayed_tr->handle = handle; list_add_tail(&delayed_tr->list, &ioc->delayed_tr_list); dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "DELAYED:tr:handle(0x%04x), (open)\n", ioc->name, handle)); - return; + goto out; } dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "tr_send:handle(0x%04x), " @@ -3150,6 +3254,9 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) mpi_request->DevHandle = cpu_to_le16(handle); mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET; mpt2sas_base_put_smid_hi_priority(ioc, smid); +out: + if (sas_device) + sas_device_put(sas_device); } @@ -4068,7 +4175,6 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, char *desc_scsi_state = ioc->tmp_string; u32 log_info = le32_to_cpu(mpi_reply->IOCLogInfo); struct _sas_device *sas_device = NULL; - unsigned long flags; struct scsi_target *starget = scmd->device->sdev_target; struct MPT2SAS_TARGET *priv_target = starget->hostdata; char *device_str = NULL; @@ -4200,9 +4306,7 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, printk(MPT2SAS_WARN_FMT "\t%s wwid(0x%016llx)\n", ioc->name, device_str, (unsigned long long)priv_target->sas_address); } else { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - priv_target->sas_address); + sas_device = mpt2sas_get_sdev_from_target(ioc, priv_target); if (sas_device) { printk(MPT2SAS_WARN_FMT "\tsas_address(0x%016llx), " "phy(%d)\n", ioc->name, sas_device->sas_address, @@ -4211,8 +4315,9 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, "\tenclosure_logical_id(0x%016llx), slot(%d)\n", ioc->name, sas_device->enclosure_logical_id, sas_device->slot); + + sas_device_put(sas_device); } - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } printk(MPT2SAS_WARN_FMT "\thandle(0x%04x), ioc_status(%s)(0x%04x), " @@ -4259,7 +4364,7 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) Mpi2SepRequest_t mpi_request; struct _sas_device *sas_device; - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); if (!sas_device) return; @@ -4274,7 +4379,7 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) &mpi_request)) != 0) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); - return; + goto out; } sas_device->pfa_led_on = 1; @@ -4284,8 +4389,10 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) "enclosure_processor: ioc_status (0x%04x), loginfo(0x%08x)\n", ioc->name, le16_to_cpu(mpi_reply.IOCStatus), le32_to_cpu(mpi_reply.IOCLogInfo))); - return; + goto out; } +out: + sas_device_put(sas_device); } /** @@ -4370,19 +4477,17 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) /* only handle non-raid devices */ spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (!sas_device) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } starget = sas_device->starget; sas_target_priv_data = starget->hostdata; if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) || - ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))) + goto out_unlock; + starget_printk(KERN_WARNING, starget, "predicted fault\n"); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -4396,7 +4501,7 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) if (!event_reply) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); - return; + goto out; } event_reply->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; @@ -4413,6 +4518,14 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) event_data->SASAddress = cpu_to_le64(sas_target_priv_data->sas_address); mpt2sas_ctl_add_to_event_log(ioc, event_reply); kfree(event_reply); +out: + if (sas_device) + sas_device_put(sas_device); + return; + +out_unlock: + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + goto out; } /** @@ -5148,14 +5261,13 @@ _scsih_check_device(struct MPT2SAS_ADAPTER *ioc, u16 handle) spin_lock_irqsave(&ioc->sas_device_lock, flags); sas_address = le64_to_cpu(sas_device_pg0.SASAddress); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); if (!sas_device) { printk(MPT2SAS_ERR_FMT "device is not present " "handle(0x%04x), no sas_device!!!\n", ioc->name, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } if (unlikely(sas_device->handle != handle)) { @@ -5172,19 +5284,24 @@ _scsih_check_device(struct MPT2SAS_ADAPTER *ioc, u16 handle) MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT)) { printk(MPT2SAS_ERR_FMT "device is not present " "handle(0x%04x), flags!!!\n", ioc->name, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } /* check if there were any issues with discovery */ if (_scsih_check_access_status(ioc, sas_address, handle, - sas_device_pg0.AccessStatus)) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + sas_device_pg0.AccessStatus)) + goto out_unlock; + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); _scsih_ublock_io_device(ioc, sas_address); + if (sas_device) + sas_device_put(sas_device); + return; +out_unlock: + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + if (sas_device) + sas_device_put(sas_device); } /** @@ -5208,7 +5325,6 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) u32 ioc_status; __le64 sas_address; u32 device_info; - unsigned long flags; if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) { @@ -5250,14 +5366,13 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) return -1; } - - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = mpt2sas_get_sdev_by_addr(ioc, sas_address); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + if (sas_device) { + sas_device_put(sas_device); return 0; + } sas_device = kzalloc(sizeof(struct _sas_device), GFP_KERNEL); @@ -5267,6 +5382,7 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) return -1; } + kref_init(&sas_device->refcount); sas_device->handle = handle; if (_scsih_get_sas_address(ioc, le16_to_cpu (sas_device_pg0.ParentDevHandle), @@ -5296,6 +5412,7 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) else _scsih_sas_device_add(ioc, sas_device); + sas_device_put(sas_device); return 0; } @@ -5344,7 +5461,6 @@ _scsih_remove_device(struct MPT2SAS_ADAPTER *ioc, "handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, __func__, sas_device->handle, (unsigned long long) sas_device->sas_address)); - kfree(sas_device); } /** * _scsih_device_remove_by_handle - removing device object by handle @@ -5363,12 +5479,17 @@ _scsih_device_remove_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - if (sas_device) - list_del(&sas_device->list); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + + if (sas_device) { _scsih_remove_device(ioc, sas_device); + sas_device_put(sas_device); + } } /** @@ -5389,13 +5510,17 @@ mpt2sas_device_remove_by_sas_address(struct MPT2SAS_ADAPTER *ioc, return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_address); - if (sas_device) - list_del(&sas_device->list); + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); + if (sas_device) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + + if (sas_device) { _scsih_remove_device(ioc, sas_device); + sas_device_put(sas_device); + } } #ifdef CONFIG_SCSI_MPT2SAS_LOGGING /** @@ -5716,26 +5841,28 @@ _scsih_sas_device_status_change_event(struct MPT2SAS_ADAPTER *ioc, spin_lock_irqsave(&ioc->sas_device_lock, flags); sas_address = le64_to_cpu(event_data->SASAddress); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); - if (!sas_device || !sas_device->starget) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + if (!sas_device || !sas_device->starget) + goto out; target_priv_data = sas_device->starget->hostdata; - if (!target_priv_data) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + if (!target_priv_data) + goto out; if (event_data->ReasonCode == MPI2_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET) target_priv_data->tm_busy = 1; else target_priv_data->tm_busy = 0; + +out: + if (sas_device) + sas_device_put(sas_device); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + } #ifdef CONFIG_SCSI_MPT2SAS_LOGGING @@ -6123,7 +6250,7 @@ _scsih_sas_pd_expose(struct MPT2SAS_ADAPTER *ioc, u16 handle = le16_to_cpu(element->PhysDiskDevHandle); spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device) { sas_device->volume_handle = 0; sas_device->volume_wwid = 0; @@ -6142,6 +6269,8 @@ _scsih_sas_pd_expose(struct MPT2SAS_ADAPTER *ioc, /* exposing raid component */ if (starget) starget_for_each_device(starget, NULL, _scsih_reprobe_lun); + + sas_device_put(sas_device); } /** @@ -6170,7 +6299,7 @@ _scsih_sas_pd_hide(struct MPT2SAS_ADAPTER *ioc, &volume_wwid); spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device) { set_bit(handle, ioc->pd_handles); if (sas_device->starget && sas_device->starget->hostdata) { @@ -6189,6 +6318,8 @@ _scsih_sas_pd_hide(struct MPT2SAS_ADAPTER *ioc, /* hiding raid component */ if (starget) starget_for_each_device(starget, (void *)1, _scsih_reprobe_lun); + + sas_device_put(sas_device); } /** @@ -6221,7 +6352,6 @@ _scsih_sas_pd_add(struct MPT2SAS_ADAPTER *ioc, Mpi2EventIrConfigElement_t *element) { struct _sas_device *sas_device; - unsigned long flags; u16 handle = le16_to_cpu(element->PhysDiskDevHandle); Mpi2ConfigReply_t mpi_reply; Mpi2SasDevicePage0_t sas_device_pg0; @@ -6231,11 +6361,11 @@ _scsih_sas_pd_add(struct MPT2SAS_ADAPTER *ioc, set_bit(handle, ioc->pd_handles); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); return; + } if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) { @@ -6509,7 +6639,6 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc, u16 handle, parent_handle; u32 state; struct _sas_device *sas_device; - unsigned long flags; Mpi2ConfigReply_t mpi_reply; Mpi2SasDevicePage0_t sas_device_pg0; u32 ioc_status; @@ -6542,12 +6671,11 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc, if (!ioc->is_warpdrive) set_bit(handle, ioc->pd_handles); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); return; + } if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, @@ -7015,6 +7143,7 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc) struct _raid_device *raid_device, *raid_device_next; struct list_head tmp_list; unsigned long flags; + LIST_HEAD(head); printk(MPT2SAS_INFO_FMT "removing unresponding devices: start\n", ioc->name); @@ -7022,14 +7151,29 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc) /* removing unresponding end devices */ printk(MPT2SAS_INFO_FMT "removing unresponding devices: end-devices\n", ioc->name); + + /* + * Iterate, pulling off devices marked as non-responding. We become the + * owner for the reference the list had on any object we prune. + */ + spin_lock_irqsave(&ioc->sas_device_lock, flags); list_for_each_entry_safe(sas_device, sas_device_next, - &ioc->sas_device_list, list) { + &ioc->sas_device_list, list) { if (!sas_device->responding) - mpt2sas_device_remove_by_sas_address(ioc, - sas_device->sas_address); + list_move_tail(&sas_device->list, &head); else sas_device->responding = 0; } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + /* + * Now, uninitialize and remove the unresponding devices we pruned. + */ + list_for_each_entry_safe(sas_device, sas_device_next, &head, list) { + _scsih_remove_device(ioc, sas_device); + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } /* removing unresponding volumes */ if (ioc->ir_firmware) { @@ -7179,11 +7323,11 @@ _scsih_scan_for_devices_after_reset(struct MPT2SAS_ADAPTER *ioc) } phys_disk_num = pd_pg0.PhysDiskNum; handle = le16_to_cpu(pd_pg0.DevHandle); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); continue; + } if (mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle) != 0) @@ -7302,12 +7446,12 @@ _scsih_scan_for_devices_after_reset(struct MPT2SAS_ADAPTER *ioc) if (!(_scsih_is_end_device( le32_to_cpu(sas_device_pg0.DeviceInfo)))) continue; - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = mpt2sas_get_sdev_by_addr(ioc, le64_to_cpu(sas_device_pg0.SASAddress)); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + if (sas_device) { + sas_device_put(sas_device); continue; + } parent_handle = le16_to_cpu(sas_device_pg0.ParentDevHandle); if (!_scsih_get_sas_address(ioc, parent_handle, &sas_address)) { printk(MPT2SAS_INFO_FMT "\tBEFORE adding end device: " @@ -7966,6 +8110,48 @@ _scsih_probe_raid(struct MPT2SAS_ADAPTER *ioc) } } +static struct _sas_device *get_next_sas_device(struct MPT2SAS_ADAPTER *ioc) +{ + struct _sas_device *sas_device = NULL; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + if (!list_empty(&ioc->sas_device_init_list)) { + sas_device = list_first_entry(&ioc->sas_device_init_list, + struct _sas_device, list); + sas_device_get(sas_device); + } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return sas_device; +} + +static void sas_device_make_active(struct MPT2SAS_ADAPTER *ioc, + struct _sas_device *sas_device) +{ + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + + /* + * Since we dropped the lock during the call to port_add(), we need to + * be careful here that somebody else didn't move or delete this item + * while we were busy with other things. + * + * If it was on the list, we need a put() for the reference the list + * had. Either way, we need a get() for the destination list. + */ + if (!list_empty(&sas_device->list)) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } + + sas_device_get(sas_device); + list_add_tail(&sas_device->list, &ioc->sas_device_list); + + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); +} + /** * _scsih_probe_sas - reporting sas devices to sas transport * @ioc: per adapter object @@ -7975,34 +8161,30 @@ _scsih_probe_raid(struct MPT2SAS_ADAPTER *ioc) static void _scsih_probe_sas(struct MPT2SAS_ADAPTER *ioc) { - struct _sas_device *sas_device, *next; - unsigned long flags; - - /* SAS Device List */ - list_for_each_entry_safe(sas_device, next, &ioc->sas_device_init_list, - list) { + struct _sas_device *sas_device; - if (ioc->hide_drives) - continue; + if (ioc->hide_drives) + return; + while ((sas_device = get_next_sas_device(ioc))) { if (!mpt2sas_transport_port_add(ioc, sas_device->handle, - sas_device->sas_address_parent)) { - list_del(&sas_device->list); - kfree(sas_device); + sas_device->sas_address_parent)) { + _scsih_sas_device_remove(ioc, sas_device); + sas_device_put(sas_device); continue; } else if (!sas_device->starget) { if (!ioc->is_driver_loading) { mpt2sas_transport_port_remove(ioc, - sas_device->sas_address, - sas_device->sas_address_parent); - list_del(&sas_device->list); - kfree(sas_device); + sas_device->sas_address, + sas_device->sas_address_parent); + _scsih_sas_device_remove(ioc, sas_device); + sas_device_put(sas_device); continue; } } - spin_lock_irqsave(&ioc->sas_device_lock, flags); - list_move_tail(&sas_device->list, &ioc->sas_device_list); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + sas_device_make_active(ioc, sas_device); + sas_device_put(sas_device); } } diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index ff2500a..af86800 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1323,15 +1323,17 @@ _transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier) int rc; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, rphy->identify.sas_address); if (sas_device) { *identifier = sas_device->enclosure_logical_id; rc = 0; + sas_device_put(sas_device); } else { *identifier = 0; rc = -ENXIO; } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); return rc; } @@ -1351,12 +1353,14 @@ _transport_get_bay_identifier(struct sas_rphy *rphy) int rc; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, rphy->identify.sas_address); - if (sas_device) + if (sas_device) { rc = sas_device->slot; - else + sas_device_put(sas_device); + } else { rc = -ENXIO; + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); return rc; } -- cgit v0.10.2 From 008549f6e8a1dc4aeea4a8d64184909786b27713 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 13 Aug 2015 18:48:10 -0700 Subject: mpt2sas: Refcount fw_events and fix unsafe list usage The fw_event_work struct is concurrently referenced at shutdown, so add a refcount to protect it, and refactor the code to use it. Additionally, refactor _scsih_fw_event_cleanup_queue() such that it no longer iterates over the list without holding the lock, since _firmware_event_work() concurrently deletes items from the list. Signed-off-by: Calvin Owens Reviewed-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Tested-by: Chaitra Basappa Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 5eca3a4..c0ff55b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -176,9 +176,37 @@ struct fw_event_work { u8 VP_ID; u8 ignore; u16 event; + struct kref refcount; char event_data[0] __aligned(4); }; +static void fw_event_work_free(struct kref *r) +{ + kfree(container_of(r, struct fw_event_work, refcount)); +} + +static void fw_event_work_get(struct fw_event_work *fw_work) +{ + kref_get(&fw_work->refcount); +} + +static void fw_event_work_put(struct fw_event_work *fw_work) +{ + kref_put(&fw_work->refcount, fw_event_work_free); +} + +static struct fw_event_work *alloc_fw_event_work(int len) +{ + struct fw_event_work *fw_event; + + fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC); + if (!fw_event) + return NULL; + + kref_init(&fw_event->refcount); + return fw_event; +} + /* raid transport support */ static struct raid_template *mpt2sas_raid_template; @@ -2872,36 +2900,39 @@ _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) return; spin_lock_irqsave(&ioc->fw_event_lock, flags); + fw_event_work_get(fw_event); list_add_tail(&fw_event->list, &ioc->fw_event_list); INIT_DELAYED_WORK(&fw_event->delayed_work, _firmware_event_work); + fw_event_work_get(fw_event); queue_delayed_work(ioc->firmware_event_thread, &fw_event->delayed_work, 0); spin_unlock_irqrestore(&ioc->fw_event_lock, flags); } /** - * _scsih_fw_event_free - delete fw_event + * _scsih_fw_event_del_from_list - delete fw_event from the list * @ioc: per adapter object * @fw_event: object describing the event * Context: This function will acquire ioc->fw_event_lock. * - * This removes firmware event object from link list, frees associated memory. + * If the fw_event is on the fw_event_list, remove it and do a put. * * Return nothing. */ static void -_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work +_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) { unsigned long flags; spin_lock_irqsave(&ioc->fw_event_lock, flags); - list_del(&fw_event->list); - kfree(fw_event); + if (!list_empty(&fw_event->list)) { + list_del_init(&fw_event->list); + fw_event_work_put(fw_event); + } spin_unlock_irqrestore(&ioc->fw_event_lock, flags); } - /** * _scsih_error_recovery_delete_devices - remove devices not responding * @ioc: per adapter object @@ -2916,13 +2947,14 @@ _scsih_error_recovery_delete_devices(struct MPT2SAS_ADAPTER *ioc) if (ioc->is_driver_loading) return; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); } /** @@ -2936,12 +2968,29 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) { struct fw_event_work *fw_event; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); +} + +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc) +{ + unsigned long flags; + struct fw_event_work *fw_event = NULL; + + spin_lock_irqsave(&ioc->fw_event_lock, flags); + if (!list_empty(&ioc->fw_event_list)) { + fw_event = list_first_entry(&ioc->fw_event_list, + struct fw_event_work, list); + list_del_init(&fw_event->list); + } + spin_unlock_irqrestore(&ioc->fw_event_lock, flags); + + return fw_event; } /** @@ -2956,17 +3005,25 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) static void _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc) { - struct fw_event_work *fw_event, *next; + struct fw_event_work *fw_event; if (list_empty(&ioc->fw_event_list) || !ioc->firmware_event_thread || in_interrupt()) return; - list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) { - if (cancel_delayed_work_sync(&fw_event->delayed_work)) { - _scsih_fw_event_free(ioc, fw_event); - continue; - } + while ((fw_event = dequeue_next_fw_event(ioc))) { + /* + * Wait on the fw_event to complete. If this returns 1, then + * the event was never executed, and we need a put for the + * reference the delayed_work had on the fw_event. + * + * If it did execute, we wait for it to finish, and the put will + * happen from _firmware_event_work() + */ + if (cancel_delayed_work_sync(&fw_event->delayed_work)) + fw_event_work_put(fw_event); + + fw_event_work_put(fw_event); } } @@ -4447,13 +4504,14 @@ _scsih_send_event_to_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) { struct fw_event_work *fw_event; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_TURN_ON_PFA_LED; fw_event->device_handle = handle; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); } /** @@ -7554,17 +7612,27 @@ _firmware_event_work(struct work_struct *work) struct fw_event_work, delayed_work.work); struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; + _scsih_fw_event_del_from_list(ioc, fw_event); + /* the queue is being flushed so ignore this event */ - if (ioc->remove_host || - ioc->pci_error_recovery) { - _scsih_fw_event_free(ioc, fw_event); + if (ioc->remove_host || ioc->pci_error_recovery) { + fw_event_work_put(fw_event); return; } switch (fw_event->event) { case MPT2SAS_REMOVE_UNRESPONDING_DEVICES: - while (scsi_host_in_recovery(ioc->shost) || ioc->shost_recovery) + while (scsi_host_in_recovery(ioc->shost) || + ioc->shost_recovery) { + /* + * If we're unloading, bail. Otherwise, this can become + * an infinite loop. + */ + if (ioc->remove_host) + goto out; + ssleep(1); + } _scsih_remove_unresponding_sas_devices(ioc); _scsih_scan_for_devices_after_reset(ioc); break; @@ -7613,7 +7681,8 @@ _firmware_event_work(struct work_struct *work) _scsih_sas_ir_operation_status_event(ioc, fw_event); break; } - _scsih_fw_event_free(ioc, fw_event); +out: + fw_event_work_put(fw_event); } /** @@ -7751,7 +7820,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, } sz = le16_to_cpu(mpi_reply->EventDataLength) * 4; - fw_event = kzalloc(sizeof(*fw_event) + sz, GFP_ATOMIC); + fw_event = alloc_fw_event_work(sz); if (!fw_event) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); @@ -7764,6 +7833,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, fw_event->VP_ID = mpi_reply->VP_ID; fw_event->event = event; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); return; } -- cgit v0.10.2 From c1287970f4847a973830daf4076bc25929f3b2d9 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:20 +0300 Subject: scsi_debug: define pr_fmt() for consistent logging Use pr_fmt with both module name and __func__ Also drop few bare printk leftovers The log format should stay pretty much intact Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 30268bb..6e6bf0f 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -25,6 +25,9 @@ * module options to "modprobe scsi_debug num_tgts=2" [20021221] */ + +#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ + #include #include @@ -2446,8 +2449,7 @@ static int dif_verify(struct sd_dif_tuple *sdt, const void *data, __be16 csum = dif_compute_csum(data, scsi_debug_sector_size); if (sdt->guard_tag != csum) { - pr_err("%s: GUARD check failed on sector %lu rcvd 0x%04x, data 0x%04x\n", - __func__, + pr_err("GUARD check failed on sector %lu rcvd 0x%04x, data 0x%04x\n", (unsigned long)sector, be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum)); @@ -2455,14 +2457,14 @@ static int dif_verify(struct sd_dif_tuple *sdt, const void *data, } if (scsi_debug_dif == SD_DIF_TYPE1_PROTECTION && be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) { - pr_err("%s: REF check failed on sector %lu\n", - __func__, (unsigned long)sector); + pr_err("REF check failed on sector %lu\n", + (unsigned long)sector); return 0x03; } if (scsi_debug_dif == SD_DIF_TYPE2_PROTECTION && be32_to_cpu(sdt->ref_tag) != ei_lba) { - pr_err("%s: REF check failed on sector %lu\n", - __func__, (unsigned long)sector); + pr_err("REF check failed on sector %lu\n", + (unsigned long)sector); return 0x03; } return 0; @@ -3449,7 +3451,7 @@ static void sdebug_q_cmd_complete(unsigned long indx) atomic_inc(&sdebug_completions); qa_indx = indx; if ((qa_indx < 0) || (qa_indx >= SCSI_DEBUG_CANQUEUE)) { - pr_err("%s: wild qa_indx=%d\n", __func__, qa_indx); + pr_err("wild qa_indx=%d\n", qa_indx); return; } spin_lock_irqsave(&queued_arr_lock, iflags); @@ -3457,21 +3459,21 @@ static void sdebug_q_cmd_complete(unsigned long indx) scp = sqcp->a_cmnd; if (NULL == scp) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: scp is NULL\n", __func__); + pr_err("scp is NULL\n"); return; } devip = (struct sdebug_dev_info *)scp->device->hostdata; if (devip) atomic_dec(&devip->num_in_q); else - pr_err("%s: devip=NULL\n", __func__); + pr_err("devip=NULL\n"); if (atomic_read(&retired_max_queue) > 0) retiring = 1; sqcp->a_cmnd = NULL; if (!test_and_clear_bit(qa_indx, queued_in_use_bm)) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: Unexpected completion\n", __func__); + pr_err("Unexpected completion\n"); return; } @@ -3481,7 +3483,7 @@ static void sdebug_q_cmd_complete(unsigned long indx) retval = atomic_read(&retired_max_queue); if (qa_indx >= retval) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: index %d too large\n", __func__, retval); + pr_err("index %d too large\n", retval); return; } k = find_last_bit(queued_in_use_bm, retval); @@ -3509,7 +3511,7 @@ sdebug_q_cmd_hrt_complete(struct hrtimer *timer) atomic_inc(&sdebug_completions); qa_indx = sd_hrtp->qa_indx; if ((qa_indx < 0) || (qa_indx >= SCSI_DEBUG_CANQUEUE)) { - pr_err("%s: wild qa_indx=%d\n", __func__, qa_indx); + pr_err("wild qa_indx=%d\n", qa_indx); goto the_end; } spin_lock_irqsave(&queued_arr_lock, iflags); @@ -3517,21 +3519,21 @@ sdebug_q_cmd_hrt_complete(struct hrtimer *timer) scp = sqcp->a_cmnd; if (NULL == scp) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: scp is NULL\n", __func__); + pr_err("scp is NULL\n"); goto the_end; } devip = (struct sdebug_dev_info *)scp->device->hostdata; if (devip) atomic_dec(&devip->num_in_q); else - pr_err("%s: devip=NULL\n", __func__); + pr_err("devip=NULL\n"); if (atomic_read(&retired_max_queue) > 0) retiring = 1; sqcp->a_cmnd = NULL; if (!test_and_clear_bit(qa_indx, queued_in_use_bm)) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: Unexpected completion\n", __func__); + pr_err("Unexpected completion\n"); goto the_end; } @@ -3541,7 +3543,7 @@ sdebug_q_cmd_hrt_complete(struct hrtimer *timer) retval = atomic_read(&retired_max_queue); if (qa_indx >= retval) { spin_unlock_irqrestore(&queued_arr_lock, iflags); - pr_err("%s: index %d too large\n", __func__, retval); + pr_err("index %d too large\n", retval); goto the_end; } k = find_last_bit(queued_in_use_bm, retval); @@ -3580,7 +3582,7 @@ static struct sdebug_dev_info * devInfoReg(struct scsi_device * sdev) return devip; sdbg_host = *(struct sdebug_host_info **)shost_priv(sdev->host); if (!sdbg_host) { - pr_err("%s: Host info NULL\n", __func__); + pr_err("Host info NULL\n"); return NULL; } list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) { @@ -3596,8 +3598,7 @@ static struct sdebug_dev_info * devInfoReg(struct scsi_device * sdev) if (!open_devip) { /* try and make a new one */ open_devip = sdebug_device_create(sdbg_host, GFP_ATOMIC); if (!open_devip) { - printk(KERN_ERR "%s: out of memory at line %d\n", - __func__, __LINE__); + pr_err("out of memory at line %d\n", __LINE__); return NULL; } } @@ -3615,7 +3616,7 @@ static struct sdebug_dev_info * devInfoReg(struct scsi_device * sdev) static int scsi_debug_slave_alloc(struct scsi_device *sdp) { if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) - printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %llu>\n", + pr_info("slave_alloc <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); return 0; @@ -3626,7 +3627,7 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp) struct sdebug_dev_info *devip; if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) - printk(KERN_INFO "scsi_debug: slave_configure <%u %u %u %llu>\n", + pr_info("slave_configure <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); if (sdp->host->max_cmd_len != SCSI_DEBUG_MAX_CMD_LEN) sdp->host->max_cmd_len = SCSI_DEBUG_MAX_CMD_LEN; @@ -3646,7 +3647,7 @@ static void scsi_debug_slave_destroy(struct scsi_device *sdp) (struct sdebug_dev_info *)sdp->hostdata; if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) - printk(KERN_INFO "scsi_debug: slave_destroy <%u %u %u %llu>\n", + pr_info("slave_destroy <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); if (devip) { /* make this slot available for re-use */ @@ -3897,8 +3898,7 @@ static void __init sdebug_build_parts(unsigned char *ramp, return; if (scsi_debug_num_parts > SDEBUG_MAX_PARTS) { scsi_debug_num_parts = SDEBUG_MAX_PARTS; - pr_warn("%s: reducing partitions to %d\n", __func__, - SDEBUG_MAX_PARTS); + pr_warn("reducing partitions to %d\n", SDEBUG_MAX_PARTS); } num_sectors = (int)sdebug_store_sectors; sectors_per_part = (num_sectors - sdebug_sectors_per) @@ -3945,8 +3945,7 @@ schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct scsi_device *sdp = cmnd->device; if (NULL == cmnd || NULL == devip) { - pr_warn("%s: called with NULL cmnd or devip pointer\n", - __func__); + pr_warn("called with NULL cmnd or devip pointer\n"); /* no particularly good error to report back */ return SCSI_MLQUEUE_HOST_BUSY; } @@ -4383,8 +4382,7 @@ static ssize_t fake_rw_store(struct device_driver *ddp, const char *buf, fake_storep = vmalloc(sz); if (NULL == fake_storep) { - pr_err("%s: out of memory, 9\n", - __func__); + pr_err("out of memory, 9\n"); return -ENOMEM; } memset(fake_storep, 0, sz); @@ -4784,8 +4782,7 @@ static int __init scsi_debug_init(void) atomic_set(&retired_max_queue, 0); if (scsi_debug_ndelay >= 1000000000) { - pr_warn("%s: ndelay must be less than 1 second, ignored\n", - __func__); + pr_warn("ndelay must be less than 1 second, ignored\n"); scsi_debug_ndelay = 0; } else if (scsi_debug_ndelay > 0) scsi_debug_delay = DELAY_OVERRIDDEN; @@ -4797,8 +4794,7 @@ static int __init scsi_debug_init(void) case 4096: break; default: - pr_err("%s: invalid sector_size %d\n", __func__, - scsi_debug_sector_size); + pr_err("invalid sector_size %d\n", scsi_debug_sector_size); return -EINVAL; } @@ -4811,29 +4807,28 @@ static int __init scsi_debug_init(void) break; default: - pr_err("%s: dif must be 0, 1, 2 or 3\n", __func__); + pr_err("dif must be 0, 1, 2 or 3\n"); return -EINVAL; } if (scsi_debug_guard > 1) { - pr_err("%s: guard must be 0 or 1\n", __func__); + pr_err("guard must be 0 or 1\n"); return -EINVAL; } if (scsi_debug_ato > 1) { - pr_err("%s: ato must be 0 or 1\n", __func__); + pr_err("ato must be 0 or 1\n"); return -EINVAL; } if (scsi_debug_physblk_exp > 15) { - pr_err("%s: invalid physblk_exp %u\n", __func__, - scsi_debug_physblk_exp); + pr_err("invalid physblk_exp %u\n", scsi_debug_physblk_exp); return -EINVAL; } if (scsi_debug_lowest_aligned > 0x3fff) { - pr_err("%s: lowest_aligned too big: %u\n", __func__, - scsi_debug_lowest_aligned); + pr_err("lowest_aligned too big: %u\n", + scsi_debug_lowest_aligned); return -EINVAL; } @@ -4863,7 +4858,7 @@ static int __init scsi_debug_init(void) if (0 == scsi_debug_fake_rw) { fake_storep = vmalloc(sz); if (NULL == fake_storep) { - pr_err("%s: out of memory, 1\n", __func__); + pr_err("out of memory, 1\n"); return -ENOMEM; } memset(fake_storep, 0, sz); @@ -4877,11 +4872,10 @@ static int __init scsi_debug_init(void) dif_size = sdebug_store_sectors * sizeof(struct sd_dif_tuple); dif_storep = vmalloc(dif_size); - pr_err("%s: dif_storep %u bytes @ %p\n", __func__, dif_size, - dif_storep); + pr_err("dif_storep %u bytes @ %p\n", dif_size, dif_storep); if (dif_storep == NULL) { - pr_err("%s: out of mem. (DIX)\n", __func__); + pr_err("out of mem. (DIX)\n"); ret = -ENOMEM; goto free_vm; } @@ -4903,18 +4897,17 @@ static int __init scsi_debug_init(void) if (scsi_debug_unmap_alignment && scsi_debug_unmap_granularity <= scsi_debug_unmap_alignment) { - pr_err("%s: ERR: unmap_granularity <= unmap_alignment\n", - __func__); + pr_err("ERR: unmap_granularity <= unmap_alignment\n"); return -EINVAL; } map_size = lba_to_map_index(sdebug_store_sectors - 1) + 1; map_storep = vmalloc(BITS_TO_LONGS(map_size) * sizeof(long)); - pr_info("%s: %lu provisioning blocks\n", __func__, map_size); + pr_info("%lu provisioning blocks\n", map_size); if (map_storep == NULL) { - pr_err("%s: out of mem. (MAP)\n", __func__); + pr_err("out of mem. (MAP)\n"); ret = -ENOMEM; goto free_vm; } @@ -4928,18 +4921,18 @@ static int __init scsi_debug_init(void) pseudo_primary = root_device_register("pseudo_0"); if (IS_ERR(pseudo_primary)) { - pr_warn("%s: root_device_register() error\n", __func__); + pr_warn("root_device_register() error\n"); ret = PTR_ERR(pseudo_primary); goto free_vm; } ret = bus_register(&pseudo_lld_bus); if (ret < 0) { - pr_warn("%s: bus_register error: %d\n", __func__, ret); + pr_warn("bus_register error: %d\n", ret); goto dev_unreg; } ret = driver_register(&sdebug_driverfs_driver); if (ret < 0) { - pr_warn("%s: driver_register error: %d\n", __func__, ret); + pr_warn("driver_register error: %d\n", ret); goto bus_unreg; } @@ -4948,16 +4941,14 @@ static int __init scsi_debug_init(void) for (k = 0; k < host_to_add; k++) { if (sdebug_add_adapter()) { - pr_err("%s: sdebug_add_adapter failed k=%d\n", - __func__, k); + pr_err("sdebug_add_adapter failed k=%d\n", k); break; } } - if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) { - pr_info("%s: built %d host(s)\n", __func__, - scsi_debug_add_host); - } + if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) + pr_info("built %d host(s)\n", scsi_debug_add_host); + return 0; bus_unreg: @@ -5012,8 +5003,7 @@ static int sdebug_add_adapter(void) sdbg_host = kzalloc(sizeof(*sdbg_host),GFP_KERNEL); if (NULL == sdbg_host) { - printk(KERN_ERR "%s: out of memory at line %d\n", - __func__, __LINE__); + pr_err("out of memory at line %d\n", __LINE__); return -ENOMEM; } @@ -5023,8 +5013,7 @@ static int sdebug_add_adapter(void) for (k = 0; k < devs_per_host; k++) { sdbg_devinfo = sdebug_device_create(sdbg_host, GFP_KERNEL); if (!sdbg_devinfo) { - printk(KERN_ERR "%s: out of memory at line %d\n", - __func__, __LINE__); + pr_err("out of memory at line %d\n", __LINE__); error = -ENOMEM; goto clean; } @@ -5338,7 +5327,7 @@ static int sdebug_driver_probe(struct device * dev) sdebug_driver_template.use_clustering = ENABLE_CLUSTERING; hpnt = scsi_host_alloc(&sdebug_driver_template, sizeof(sdbg_host)); if (NULL == hpnt) { - pr_err("%s: scsi_host_alloc failed\n", __func__); + pr_err("scsi_host_alloc failed\n"); error = -ENODEV; return error; } @@ -5381,7 +5370,7 @@ static int sdebug_driver_probe(struct device * dev) scsi_host_set_prot(hpnt, host_prot); - printk(KERN_INFO "scsi_debug: host protection%s%s%s%s%s%s%s\n", + pr_info("host protection%s%s%s%s%s%s%s\n", (host_prot & SHOST_DIF_TYPE1_PROTECTION) ? " DIF1" : "", (host_prot & SHOST_DIF_TYPE2_PROTECTION) ? " DIF2" : "", (host_prot & SHOST_DIF_TYPE3_PROTECTION) ? " DIF3" : "", @@ -5409,7 +5398,7 @@ static int sdebug_driver_probe(struct device * dev) error = scsi_add_host(hpnt, &sdbg_host->dev); if (error) { - printk(KERN_ERR "%s: scsi_add_host failed\n", __func__); + pr_err("scsi_add_host failed\n"); error = -ENODEV; scsi_host_put(hpnt); } else @@ -5426,8 +5415,7 @@ static int sdebug_driver_remove(struct device * dev) sdbg_host = to_sdebug_host(dev); if (!sdbg_host) { - printk(KERN_ERR "%s: Unable to locate host info\n", - __func__); + pr_err("Unable to locate host info\n"); return -ENODEV; } -- cgit v0.10.2 From 34d55434ba1f39093ea30cb770d70508fdb1edaa Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:21 +0300 Subject: scsi_debug: use SCSI_W_LUN_REPORT_LUNS instead of SAM2_WLUN_REPORT_LUNS; use SCSI_W_LUN_REPORT_LUNS from scsi.h instead of localy defined SAM2_WLUN_REPORT_LUNS Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 6e6bf0f..bbe04da 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -204,7 +204,6 @@ static const char *scsi_debug_version_date = "20141022"; /* If REPORT LUNS has luns >= 256 it can choose "flat space" (value 1) * or "peripheral device" addressing (value 0) */ #define SAM2_LUN_ADDRESS_METHOD 0 -#define SAM2_WLUN_REPORT_LUNS 0xc101 /* SCSI_DEBUG_CANQUEUE is the maximum number of commands that can be queued * (for response) at one time. Can be reduced by max_queue option. Command @@ -701,7 +700,7 @@ static void sdebug_max_tgts_luns(void) else hpnt->max_id = scsi_debug_num_tgts; /* scsi_debug_max_luns; */ - hpnt->max_lun = SAM2_WLUN_REPORT_LUNS; + hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS; } spin_unlock(&sdebug_host_list_lock); } @@ -1291,7 +1290,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) arr = kzalloc(SDEBUG_MAX_INQ_ARR_SZ, GFP_ATOMIC); if (! arr) return DID_REQUEUE << 16; - have_wlun = (scp->device->lun == SAM2_WLUN_REPORT_LUNS); + have_wlun = (scp->device->lun == SCSI_W_LUN_REPORT_LUNS); if (have_wlun) pq_pdt = 0x1e; /* present, wlun */ else if (scsi_debug_no_lun_0 && (0 == devip->lun)) @@ -3367,8 +3366,8 @@ static int resp_report_luns(struct scsi_cmnd * scp, one_lun[i].scsi_lun[1] = lun & 0xff; } if (want_wlun) { - one_lun[i].scsi_lun[0] = (SAM2_WLUN_REPORT_LUNS >> 8) & 0xff; - one_lun[i].scsi_lun[1] = SAM2_WLUN_REPORT_LUNS & 0xff; + one_lun[i].scsi_lun[0] = (SCSI_W_LUN_REPORT_LUNS >> 8) & 0xff; + one_lun[i].scsi_lun[1] = SCSI_W_LUN_REPORT_LUNS & 0xff; i++; } alloc_len = (unsigned char *)(one_lun + i) - arr; @@ -5167,7 +5166,7 @@ scsi_debug_queuecommand(struct scsi_cmnd *scp) } sdev_printk(KERN_INFO, sdp, "%s: cmd %s\n", my_name, b); } - has_wlun_rl = (sdp->lun == SAM2_WLUN_REPORT_LUNS); + has_wlun_rl = (sdp->lun == SCSI_W_LUN_REPORT_LUNS); if ((sdp->lun >= scsi_debug_max_luns) && !has_wlun_rl) return schedule_resp(scp, NULL, errsts_no_connect, 0); @@ -5338,7 +5337,7 @@ static int sdebug_driver_probe(struct device * dev) hpnt->max_id = scsi_debug_num_tgts + 1; else hpnt->max_id = scsi_debug_num_tgts; - hpnt->max_lun = SAM2_WLUN_REPORT_LUNS; /* = scsi_debug_max_luns; */ + hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS; /* = scsi_debug_max_luns; */ host_prot = 0; -- cgit v0.10.2 From de232af6703ff8e283559016c14a3273ea932878 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:22 +0300 Subject: scsi_debug: vfree is null safe so drop the check Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index bbe04da..e2c509e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4955,10 +4955,8 @@ bus_unreg: dev_unreg: root_device_unregister(pseudo_primary); free_vm: - if (map_storep) - vfree(map_storep); - if (dif_storep) - vfree(dif_storep); + vfree(map_storep); + vfree(dif_storep); vfree(fake_storep); return ret; @@ -4976,9 +4974,7 @@ static void __exit scsi_debug_exit(void) bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); - if (dif_storep) - vfree(dif_storep); - + vfree(dif_storep); vfree(fake_storep); } -- cgit v0.10.2 From 58a8635d5a1b49c4b87fb48969319e1ce77d3f03 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:23 +0300 Subject: scsi_debug: make dump_sector static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes warning: warning: no previous prototype for ‘dump_sector’ Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index e2c509e..3a70683 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2681,7 +2681,7 @@ resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return 0; } -void dump_sector(unsigned char *buf, int len) +static void dump_sector(unsigned char *buf, int len) { int i, j, n; -- cgit v0.10.2 From 299b6c07ea134d4f9d7cb743194bf9c1941585b2 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:24 +0300 Subject: scsi_debug: schedule_resp fix input variable check The function should never be called with cmnd NULL so put a fat WARN there. Fix also smatch wraning: schedule_resp() warn: variable dereferenced before check 'cmnd' Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 3a70683..faa4ddd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -3941,13 +3941,20 @@ schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, unsigned long iflags; int k, num_in_q, qdepth, inject; struct sdebug_queued_cmd *sqcp = NULL; - struct scsi_device *sdp = cmnd->device; + struct scsi_device *sdp; + + /* this should never happen */ + if (WARN_ON(!cmnd)) + return SCSI_MLQUEUE_HOST_BUSY; - if (NULL == cmnd || NULL == devip) { - pr_warn("called with NULL cmnd or devip pointer\n"); + if (NULL == devip) { + pr_warn("called devip == NULL\n"); /* no particularly good error to report back */ return SCSI_MLQUEUE_HOST_BUSY; } + + sdp = cmnd->device; + if ((scsi_result) && (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)) sdev_printk(KERN_INFO, sdp, "%s: non-zero result=0x%x\n", __func__, scsi_result); -- cgit v0.10.2 From f2d3fd29ba6226218533ad3fbba2312ae122663f Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:25 +0300 Subject: scsi_debug: fix REPORT LUNS Well Known LU The use case to report 'REPORT LUNS WLUN' described in scsi_debug documentation didn't work because: scsi_scan_host_selected() checks for: lun < shost->max_lun To fix this we set: max_lun = SCSI_W_LUN_REPORT_LUNS + 1; Signed-off-by: Tomas Winkler Acked-by: Douglas Gilbert Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index faa4ddd..2e0b3d7 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -700,7 +700,7 @@ static void sdebug_max_tgts_luns(void) else hpnt->max_id = scsi_debug_num_tgts; /* scsi_debug_max_luns; */ - hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS; + hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS + 1; } spin_unlock(&sdebug_host_list_lock); } @@ -5340,7 +5340,8 @@ static int sdebug_driver_probe(struct device * dev) hpnt->max_id = scsi_debug_num_tgts + 1; else hpnt->max_id = scsi_debug_num_tgts; - hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS; /* = scsi_debug_max_luns; */ + /* = scsi_debug_max_luns; */ + hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS + 1; host_prot = 0; -- cgit v0.10.2 From 2492fc09f0b90cd69cd9788d12c5c79d673adef3 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 28 Jul 2015 16:54:26 +0300 Subject: scsi_debug: resp_request: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warning In function ‘resp_requests’: drivers/scsi//scsi_debug.c:1432:15: warning: variable ‘want_dsense’ set but not used [-Wunused-but-set-variable] bool dsense, want_dsense; Signed-off-by: Tomas Winkler Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 2e0b3d7..dfcc45b 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1429,12 +1429,11 @@ static int resp_requests(struct scsi_cmnd * scp, unsigned char * sbuff; unsigned char *cmd = scp->cmnd; unsigned char arr[SCSI_SENSE_BUFFERSIZE]; - bool dsense, want_dsense; + bool dsense; int len = 18; memset(arr, 0, sizeof(arr)); dsense = !!(cmd[1] & 1); - want_dsense = dsense || scsi_debug_dsense; sbuff = scp->sense_buffer; if ((iec_m_pg[2] & 0x4) && (6 == (iec_m_pg[3] & 0xf))) { if (dsense) { -- cgit v0.10.2 From 0c416b54f29151a31999868d59c64ace11589d1b Mon Sep 17 00:00:00 2001 From: Jordan Hargrave Date: Mon, 13 Jul 2015 09:27:33 -0500 Subject: scsi_transport_sas: Remove check for SAS expander when querying bay/enclosure IDs. Dell Server backplanes can report bay/enclosure IDs without an expander present. This patch allows the bay/enclosure IDs to be propagaged to sysfs.we Signed-off-by: Jordan Hargrave Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 9a05819..30d26e3 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1222,13 +1222,6 @@ show_sas_rphy_enclosure_identifier(struct device *dev, u64 identifier; int error; - /* - * Only devices behind an expander are supported, because the - * enclosure identifier is a SMP feature. - */ - if (scsi_is_sas_phy_local(phy)) - return -EINVAL; - error = i->f->get_enclosure_identifier(rphy, &identifier); if (error) return error; @@ -1248,9 +1241,6 @@ show_sas_rphy_bay_identifier(struct device *dev, struct sas_internal *i = to_sas_internal(shost->transportt); int val; - if (scsi_is_sas_phy_local(phy)) - return -EINVAL; - val = i->f->get_bay_identifier(rphy); if (val < 0) return val; -- cgit v0.10.2 From b7f4d6343820af5c2dc3979e91d85e71e638cd3d Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Thu, 13 Aug 2015 06:41:51 -0400 Subject: bfa: Fix indentation Signed-off-by: Anil Gurumurthy Tested-by : Sudarasana Kalluru Reviewed-by: Ewan D. Milne Signed-off-by: James Bottomley diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 315d6d6..4e71044 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -3665,19 +3665,19 @@ bfa_cb_sfp_state_query(struct bfa_sfp_s *sfp) if (sfp->state_query_cbfn) sfp->state_query_cbfn(sfp->state_query_cbarg, sfp->status); - sfp->media = NULL; - } + sfp->media = NULL; + } - if (sfp->portspeed) { - sfp->status = bfa_sfp_speed_valid(sfp, sfp->portspeed); - if (sfp->state_query_cbfn) - sfp->state_query_cbfn(sfp->state_query_cbarg, - sfp->status); - sfp->portspeed = BFA_PORT_SPEED_UNKNOWN; - } + if (sfp->portspeed) { + sfp->status = bfa_sfp_speed_valid(sfp, sfp->portspeed); + if (sfp->state_query_cbfn) + sfp->state_query_cbfn(sfp->state_query_cbarg, + sfp->status); + sfp->portspeed = BFA_PORT_SPEED_UNKNOWN; + } - sfp->state_query_lock = 0; - sfp->state_query_cbfn = NULL; + sfp->state_query_lock = 0; + sfp->state_query_cbfn = NULL; } /* -- cgit v0.10.2 From 6f3d828f5bd72174c24789aba5d4ed036b60f44b Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Thu, 13 Aug 2015 06:41:52 -0400 Subject: bfa: Fix incorrect de-reference of pointer Signed-off-by: Anil Gurumurthy Tested-by: Sudarsana Kalluru Reviewed-by: Ewan D. Milne Signed-off-by: James Bottomley diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 4e71044..98f7e8c 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -3878,7 +3878,7 @@ bfa_sfp_show_comp(struct bfa_sfp_s *sfp, struct bfi_mbmsg_s *msg) bfa_trc(sfp, sfp->data_valid); if (sfp->data_valid) { u32 size = sizeof(struct sfp_mem_s); - u8 *des = (u8 *) &(sfp->sfpmem); + u8 *des = (u8 *)(sfp->sfpmem); memcpy(des, sfp->dbuf_kva, size); } /* -- cgit v0.10.2 From 612872cabf5be6f95d43d9a88eef38201ae8005d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 17 Aug 2015 13:03:02 +0200 Subject: lpfc: Fix possible use-after-free and double free in lpfc_mbx_cmpl_rdp_page_a2() If the bf_get() call in lpfc_mbx_cmpl_rdp_page_a2() does succeeds, execution continues normally and mp gets kfree()d. If the subsequent call to lpfc_sli_issue_mbox() fails execution jumps to the error label where lpfc_mbuf_free() is called with mp->virt and mp->phys as function arguments. This is the use after free. Following the use after free mp gets kfree()d again which is a double free. Signed-off-by: Johannes Thumshirn Acked-by: James Smart Signed-off-by: James Bottomley diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index eb62772..4abb93a 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -2284,7 +2284,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) (struct lpfc_rdp_context *)(mbox->context2); if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) - goto error; + goto error_mbuf_free; lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a2, DMP_SFF_PAGE_A2_SIZE); @@ -2299,13 +2299,14 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat; mbox->context2 = (struct lpfc_rdp_context *) rdp_context; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED) - goto error; + goto error_cmd_free; return; -error: +error_mbuf_free: lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); +error_cmd_free: lpfc_sli4_mbox_cmd_free(phba, mbox); rdp_context->cmpl(phba, rdp_context, FAILURE); } -- cgit v0.10.2 From 50acde8ed35620fdfad88f4e20a39a1eb0d8d4c9 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 17 Aug 2015 15:52:32 +0200 Subject: pm80xx: Don't override ts->stat on IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY In case psataPayload->status has a status of IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY ts->stat gets set to SAS_OPEN_REJECT but a missing 'break' statement causes a fallthrough to the default handler of the switch statement overriding ts->stat to SAS_DEV_NO_RESPONSE. Signed-off-by: Johannes Thumshirn Acked-by: Jack Wang Signed-off-by: James Bottomley diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 39306b1..04e67a1 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -2642,6 +2642,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_OPEN_REJECT; ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; default: PM8001_IO_DBG(pm8001_ha, pm8001_printk("Unknown status 0x%x\n", status)); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 0e1628f..9a389f1 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2337,6 +2337,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_OPEN_REJECT; ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; default: PM8001_IO_DBG(pm8001_ha, pm8001_printk("Unknown status 0x%x\n", status)); -- cgit v0.10.2 From 6229b414b3adb3aac0b54e67d72d6462fc230c0d Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan Date: Tue, 18 Aug 2015 13:27:10 +0530 Subject: mpt2sas: setpci reset kernel oops fix mpt2sas: setpci reset on nytro warpdrive card along with sysfs access and cli ioctl access resulted in kernel oops 1. pci_access_mutex lock added to provide synchronization between IOCTL, sysfs, PCI resource handling path 2. gioc_lock spinlock to protect list operations over multiple controllers >From c53a1cff4c07528b8b9ec7f6716e94950283e8f9 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan Date: Tue, 18 Aug 2015 11:58:13 +0530 Subject: [PATCH] mpt2sas setpci reset oops fix In mpt2sas driver due to lack of synchronization between ioctl, BRM status access through sysfs, pci resource removal kernel oops happen as ioctl path and BRM status sysfs access path still tries to access the removed resources Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path Note: pci_access_mutex is used only if nytro warpdrive cards (ioc->is_warpdrive based on device id) are used as we could not test this case with other SAS2 HBA cards We can remove this check if this behaviour confirmed from other cards. 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path Signed-off-by: Nagarajkumar Narayanan Reviewed-by: Johannes Thumshirn Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 6dec7cf..c167911 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -112,9 +112,12 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) if (ret) return ret; + /* global ioc spinlock to protect controller list on list operations */ printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug); + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) ioc->fwfault_debug = mpt2sas_fwfault_debug; + spin_unlock(&gioc_lock); return 0; } @@ -4437,6 +4440,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, __func__)); + /* synchronizing freeing resource with pci_access_mutex lock */ + mutex_lock(&ioc->pci_access_mutex); if (ioc->chip_phys && ioc->chip) { _base_mask_interrupts(ioc); ioc->shost_recovery = 1; @@ -4456,6 +4461,7 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } + mutex_unlock(&ioc->pci_access_mutex); return; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 78f41ac..97ea360 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -817,6 +817,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc); * @delayed_tr_list: target reset link list * @delayed_tr_volume_list: volume target reset link list * @@temp_sensors_count: flag to carry the number of temperature sensors + * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and + * pci resource handling. PCI resource freeing will lead to free + * vital hardware/memory resource, which might be in use by cli/sysfs + * path functions resulting in Null pointer reference followed by kernel + * crash. To avoid the above race condition we use mutex syncrhonization + * which ensures the syncrhonization between cli/sysfs_show path */ struct MPT2SAS_ADAPTER { struct list_head list; @@ -1033,6 +1039,7 @@ struct MPT2SAS_ADAPTER { u8 mfg_pg10_hide_flag; u8 hide_drives; + struct mutex pci_access_mutex; }; typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, @@ -1041,6 +1048,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, /* base shared API */ extern struct list_head mpt2sas_ioc_list; +/* spinlock on list operations over IOCs + * Case: when multiple warpdrive cards(IOCs) are in use + * Each IOC will added to the ioc list stucture on initialization. + * Watchdog threads run at regular intervals to check IOC for any + * fault conditions which will trigger the dead_ioc thread to + * deallocate pci resource, resulting deleting the IOC netry from list, + * this deletion need to protected by spinlock to enusre that + * ioc removal is syncrhonized, if not synchronized it might lead to + * list_del corruption as the ioc list is traversed in cli path + */ +extern spinlock_t gioc_lock; void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc); void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc); @@ -1119,7 +1137,6 @@ struct _sas_device *__mpt2sas_get_sdev_by_addr( struct MPT2SAS_ADAPTER *ioc, u64 sas_address); void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc); - void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase); /* config shared API */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c index 4e50960..3694b63 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c +++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c @@ -427,13 +427,16 @@ static int _ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp) { struct MPT2SAS_ADAPTER *ioc; - + /* global ioc lock to protect controller on list operations */ + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { if (ioc->id != ioc_number) continue; + spin_unlock(&gioc_lock); *iocpp = ioc; return ioc_number; } + spin_unlock(&gioc_lock); *iocpp = NULL; return -1; } @@ -522,10 +525,15 @@ _ctl_poll(struct file *filep, poll_table *wait) poll_wait(filep, &ctl_poll_wait, wait); + /* global ioc lock to protect controller on list operations */ + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { - if (ioc->aen_event_read_flag) + if (ioc->aen_event_read_flag) { + spin_unlock(&gioc_lock); return POLLIN | POLLRDNORM; + } } + spin_unlock(&gioc_lock); return 0; } @@ -2168,16 +2176,23 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc) return -ENODEV; + /* pci_access_mutex lock acquired by ioctl path */ + mutex_lock(&ioc->pci_access_mutex); if (ioc->shost_recovery || ioc->pci_error_recovery || - ioc->is_driver_loading) - return -EAGAIN; + ioc->is_driver_loading || ioc->remove_host) { + ret = -EAGAIN; + goto out_unlock_pciaccess; + } state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING; if (state == NON_BLOCKING) { - if (!mutex_trylock(&ioc->ctl_cmds.mutex)) - return -EAGAIN; + if (!mutex_trylock(&ioc->ctl_cmds.mutex)) { + ret = -EAGAIN; + goto out_unlock_pciaccess; + } } else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) { - return -ERESTARTSYS; + ret = -ERESTARTSYS; + goto out_unlock_pciaccess; } switch (cmd) { @@ -2258,6 +2273,8 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, } mutex_unlock(&ioc->ctl_cmds.mutex); +out_unlock_pciaccess: + mutex_unlock(&ioc->pci_access_mutex); return ret; } @@ -2711,6 +2728,12 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr, "warpdrive\n", ioc->name, __func__); goto out; } + /* pci_access_mutex lock acquired by sysfs show path */ + mutex_lock(&ioc->pci_access_mutex); + if (ioc->pci_error_recovery || ioc->remove_host) { + mutex_unlock(&ioc->pci_access_mutex); + return 0; + } /* allocate upto GPIOVal 36 entries */ sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36); @@ -2749,6 +2772,7 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr, out: kfree(io_unit_pg3); + mutex_unlock(&ioc->pci_access_mutex); return rc; } static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index c0ff55b..0ad09b2 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -79,7 +79,8 @@ static int _scsih_scan_finished(struct Scsi_Host *shost, unsigned long time); /* global parameters */ LIST_HEAD(mpt2sas_ioc_list); - +/* global ioc lock for list operations */ +DEFINE_SPINLOCK(gioc_lock); /* local parameters */ static u8 scsi_io_cb_idx = -1; static u8 tm_cb_idx = -1; @@ -321,8 +322,10 @@ _scsih_set_debug_level(const char *val, struct kernel_param *kp) return ret; printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level); + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) ioc->logging_level = logging_level; + spin_unlock(&gioc_lock); return 0; } module_param_call(logging_level, _scsih_set_debug_level, param_get_int, @@ -8081,7 +8084,9 @@ _scsih_remove(struct pci_dev *pdev) sas_remove_host(shost); scsi_remove_host(shost); mpt2sas_base_detach(ioc); + spin_lock(&gioc_lock); list_del(&ioc->list); + spin_unlock(&gioc_lock); scsi_host_put(shost); } @@ -8394,7 +8399,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) ioc = shost_priv(shost); memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER)); INIT_LIST_HEAD(&ioc->list); + spin_lock(&gioc_lock); list_add_tail(&ioc->list, &mpt2sas_ioc_list); + spin_unlock(&gioc_lock); ioc->shost = shost; ioc->id = mpt_ids++; sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id); @@ -8419,6 +8426,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds; /* misc semaphores and spin locks */ mutex_init(&ioc->reset_in_progress_mutex); + /* initializing pci_access_mutex lock */ + mutex_init(&ioc->pci_access_mutex); spin_lock_init(&ioc->ioc_reset_in_progress_lock); spin_lock_init(&ioc->scsi_lookup_lock); spin_lock_init(&ioc->sas_device_lock); @@ -8521,7 +8530,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) out_attach_fail: destroy_workqueue(ioc->firmware_event_thread); out_thread_fail: + spin_lock(&gioc_lock); list_del(&ioc->list); + spin_unlock(&gioc_lock); scsi_host_put(shost); return rv; } -- cgit v0.10.2 From 420fa2118c020a005e9f0311c1e0b27414306618 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 19 Aug 2015 11:18:44 +0530 Subject: fcoe: Convert use of __constant_htons to htons In little endian cases, the macro htons unfolds to __swab16 which provides special case for constants. In big endian cases, __constant_htons and htons expand directly to the same expression. So, replace __constant_htons with htons with the goal of getting rid of the definition of __constant_htons completely. The semantic patch that performs this transformation is as follows: @@expression x;@@ - __constant_htons(x) + htons(x) Signed-off-by: Vaishali Thakkar Acked-by: Vasu Dev Signed-off-by: James Bottomley diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ec193a8..d3eb80c 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -364,7 +364,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, * on the ethertype for the given device */ fcoe->fcoe_packet_type.func = fcoe_rcv; - fcoe->fcoe_packet_type.type = __constant_htons(ETH_P_FCOE); + fcoe->fcoe_packet_type.type = htons(ETH_P_FCOE); fcoe->fcoe_packet_type.dev = netdev; dev_add_pack(&fcoe->fcoe_packet_type); -- cgit v0.10.2 From 537b604c8b3aa8b96fe35f87dd085816552e294c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 27 Aug 2015 20:16:37 +0200 Subject: scsi: fix scsi_error_handler vs. scsi_host_dev_release race b9d5c6b7ef57 ("[SCSI] cleanup setting task state in scsi_error_handler()") has introduced a race between scsi_error_handler and scsi_host_dev_release resulting in the hang when the device goes away because scsi_error_handler might miss a wake up: CPU0 CPU1 scsi_error_handler scsi_host_dev_release kthread_stop() kthread_should_stop() test_bit(KTHREAD_SHOULD_STOP) set_bit(KTHREAD_SHOULD_STOP) wake_up_process() wait_for_completion() set_current_state(TASK_INTERRUPTIBLE) schedule() The most straightforward solution seems to be to invert the ordering of the set_current_state and kthread_should_stop. The issue has been noticed during reboot test on a 3.0 based kernel but the current code seems to be affected in the same way. [jejb: additional comment added] Cc: # 3.6+ Reported-and-debugged-by: Mike Mayer Signed-off-by: Michal Hocko Reviewed-by: Dan Williams Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 410911c..b5bbc12 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2179,8 +2179,17 @@ int scsi_error_handler(void *data) * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ - while (!kthread_should_stop()) { + while (true) { + /* + * The sequence in kthread_stop() sets the stop flag first + * then wakes the process. To avoid missed wakeups, the task + * should always be in a non running state before the stop + * flag is checked + */ set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != atomic_read(&shost->host_busy)) { SCSI_LOG_ERROR_RECOVERY(1, -- cgit v0.10.2 From bb0054552d080dd929907c5925d4bedc8bf6def7 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 2 Sep 2015 08:39:28 -0700 Subject: powerpc/powernv/pci-ioda: fix 32-bit TCE table init in kdump kernel When attempting to kdump with the 4.2 kernel, we see for each PCI device: pci 0003:01 : [PE# 000] Assign DMA32 space pci 0003:01 : [PE# 000] Setting up 32-bit TCE table at 0..80000000 pci 0003:01 : [PE# 000] Failed to create 32-bit TCE table, err -22 PCI: Domain 0004 has 8 available 32-bit DMA segments PCI: 4 PE# for a total weight of 70 pci 0004:01 : [PE# 002] Assign DMA32 space pci 0004:01 : [PE# 002] Setting up 32-bit TCE table at 0..80000000 pci 0004:01 : [PE# 002] Failed to create 32-bit TCE table, err -22 pci 0004:0d : [PE# 005] Assign DMA32 space pci 0004:0d : [PE# 005] Setting up 32-bit TCE table at 0..80000000 pci 0004:0d : [PE# 005] Failed to create 32-bit TCE table, err -22 pci 0004:0e : [PE# 006] Assign DMA32 space pci 0004:0e : [PE# 006] Setting up 32-bit TCE table at 0..80000000 pci 0004:0e : [PE# 006] Failed to create 32-bit TCE table, err -22 pci 0004:10 : [PE# 008] Assign DMA32 space pci 0004:10 : [PE# 008] Setting up 32-bit TCE table at 0..80000000 pci 0004:10 : [PE# 008] Failed to create 32-bit TCE table, err -22 and eventually the kdump kernel fails to boot as none of the PCI devices (including the disk controller) are successfully initialized. The EINVAL response is because the DMA window (the 2GB base window) is larger than the kdump kernel's reserved memory (crashkernel=, in this case specified to be 1024M). The check in question, if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) is a valid sanity check for pnv_pci_ioda2_table_alloc_pages(), so adjust the caller to pass in a smaller window size if our maximum memory value is smaller than the DMA window. After this change, the PCI devices successfully set up the 32-bit TCE table and kdump succeeds. The problem was seen on a Firestone machine originally. Fixes: aca6913f5551 ("powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages") Cc: stable@vger.kernel.org # 4.2 Signed-off-by: Nishanth Aravamudan Reviewed-by: Alexey Kardashevskiy [mpe: Coding style pedantry, use u64, change the indentation] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2927cd5..7708327 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2049,9 +2049,17 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) struct iommu_table *tbl = NULL; long rc; + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 window_size = min((u64)pe->table_group.tce32_size, + memory_hotplug_max()); + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, + window_size, POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", -- cgit v0.10.2 From fa14486979b3a47307bcdb10f8b5baa875a5cf68 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Fri, 4 Sep 2015 11:22:52 -0700 Subject: powerpc/powernv/pci-ioda: fix kdump with non-power-of-2 crashkernel= The 32-bit TCE table initialization relies on the DMA window having a size equal to a power of 2 (and checks for it explicitly). But crashkernel= has no constraint that requires a power-of-2 be specified. This causes the kdump kernel to fail to boot as none of the PCI devices (including the disk controller) are successfully initialized. After this change, the PCI devices successfully set up the 32-bit TCE table and kdump succeeds. Fixes: aca6913f5551 ("powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages") Signed-off-by: Nishanth Aravamudan Cc: stable@vger.kernel.org # 4.2 Tested-by: Jan Stancek Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7708327..414fd1a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2050,12 +2050,18 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) long rc; /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + + /* * In memory constrained environments, e.g. kdump kernel, the * DMA window can be larger than available memory, which will * cause errors later. */ - const u64 window_size = min((u64)pe->table_group.tce32_size, - memory_hotplug_max()); + const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, -- cgit v0.10.2 From 7d1647dc4ba0a61fec5381c1abb59dc886b6ef3c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 7 Sep 2015 10:52:58 +1000 Subject: cxl: abort cxl_pci_enable_device_hook() if PCI channel is offline cxl_pci_enable_device_hook() is called when attempting to enable an AFU sitting on a vPHB. At present, the state of the underlying CXL card's PCI channel is only checked when it calls cxl_afu_check_and_enable() at the very end, after it has already set DMA options and initialised a default context. Check the CXL card's link status before setting DMA options or initialising a default context. If the link is down, print a warning and return immediately. Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 6dd16a6..94b5208 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -48,6 +48,12 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); -- cgit v0.10.2 From a077224fd35b2f7fbc93f14cf67074fc792fbac2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Sep 2015 13:24:40 +0100 Subject: ARM: 8429/1: disable GCC SRA optimization While working on the 32-bit ARM port of UEFI, I noticed a strange corruption in the kernel log. The following snprintf() statement (in drivers/firmware/efi/efi.c:efi_md_typeattr_format()) snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", was producing the following output in the log: | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | | | | |UC] |RUN| | | | | | | |UC] As it turns out, this is caused by incorrect code being emitted for the string() function in lib/vsprintf.c. The following code if (!(spec.flags & LEFT)) { while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } } for (i = 0; i < len; ++i) { if (buf < end) *buf = *s; ++buf; ++s; } while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } when called with len == 0, triggers an issue in the GCC SRA optimization pass (Scalar Replacement of Aggregates), which handles promotion of signed struct members incorrectly. This is a known but as yet unresolved issue. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932). In this particular case, it is causing the second while loop to be executed erroneously a single time, causing the additional space characters to be printed. So disable the optimization by passing -fno-ipa-sra. Cc: Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7451b44..2c2b28e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -54,6 +54,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes -- cgit v0.10.2 From 6583d2032d57df9f1c00c753ca58e1a822901bf0 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Mon, 7 Sep 2015 19:37:57 +0800 Subject: spi: mediatek: fix spi cs polarity error Mediatek spi HW can't set cs inactive(keep cs high) directly. Instead, it supplies pause mode to do it indirectly. If driver unsets SPI_CMD_PAUSE_MODE in CMD_REG, it also needs to reset internal state machine to let cs inactive at once. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 5f6315c..b6073bb 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -173,22 +173,6 @@ static void mtk_spi_config(struct mtk_spi *mdata, writel(mdata->pad_sel, mdata->base + SPI_PAD_SEL_REG); } -static int mtk_spi_prepare_hardware(struct spi_master *master) -{ - struct spi_transfer *trans; - struct mtk_spi *mdata = spi_master_get_devdata(master); - struct spi_message *msg = master->cur_msg; - - trans = list_first_entry(&msg->transfers, struct spi_transfer, - transfer_list); - if (!trans->cs_change) { - mdata->state = MTK_SPI_IDLE; - mtk_spi_reset(mdata); - } - - return 0; -} - static int mtk_spi_prepare_message(struct spi_master *master, struct spi_message *msg) { @@ -228,11 +212,15 @@ static void mtk_spi_set_cs(struct spi_device *spi, bool enable) struct mtk_spi *mdata = spi_master_get_devdata(spi->master); reg_val = readl(mdata->base + SPI_CMD_REG); - if (!enable) + if (!enable) { reg_val |= SPI_CMD_PAUSE_EN; - else + writel(reg_val, mdata->base + SPI_CMD_REG); + } else { reg_val &= ~SPI_CMD_PAUSE_EN; - writel(reg_val, mdata->base + SPI_CMD_REG); + writel(reg_val, mdata->base + SPI_CMD_REG); + mdata->state = MTK_SPI_IDLE; + mtk_spi_reset(mdata); + } } static void mtk_spi_prepare_transfer(struct spi_master *master, @@ -509,7 +497,6 @@ static int mtk_spi_probe(struct platform_device *pdev) master->mode_bits = SPI_CPOL | SPI_CPHA; master->set_cs = mtk_spi_set_cs; - master->prepare_transfer_hardware = mtk_spi_prepare_hardware; master->prepare_message = mtk_spi_prepare_message; master->transfer_one = mtk_spi_transfer_one; master->can_dma = mtk_spi_can_dma; -- cgit v0.10.2 From 5e9fd733fa34b491e7ac41c91aa42ba0a9d8ea10 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sat, 4 Jul 2015 14:48:33 -0400 Subject: NTB: Add list to MAINTAINERS Add the new NTB mailing list to MAINTAINERS Signed-off-by: Jon Mason diff --git a/MAINTAINERS b/MAINTAINERS index b60e2b2..754bc73 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7258,6 +7258,7 @@ NTB DRIVER CORE M: Jon Mason M: Dave Jiang M: Allen Hubbe +L: linux-ntb@googlegroups.com S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git @@ -7269,6 +7270,7 @@ F: include/linux/ntb_transport.h NTB INTEL DRIVER M: Jon Mason M: Dave Jiang +L: linux-ntb@googlegroups.com S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git -- cgit v0.10.2 From e74bfeedad08180b968d8613dcde141ffb0720c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:17 -0400 Subject: NTB: Add flow control to the ntb_netdev Right now if we push the NTB really hard, we start dropping packets due to not able to process the packets fast enough. We need to st:qop the upper layer from flooding us when that happens. A timer is necessary in order to restart the queue once the resource has been processed on the receive side. Due to the way NTB is setup, the resources on the tx side are tied to the processing of the rx side and there's no async way to know when the rx side has released those resources. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index d8757bf..a9acf71 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -61,11 +61,21 @@ MODULE_VERSION(NTB_NETDEV_VER); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); +/* Time in usecs for tx resource reaper */ +static unsigned int tx_time = 1; + +/* Number of descriptors to free before resuming tx */ +static unsigned int tx_start = 10; + +/* Number of descriptors still available before stop upper layer tx */ +static unsigned int tx_stop = 5; + struct ntb_netdev { struct list_head list; struct pci_dev *pdev; struct net_device *ndev; struct ntb_transport_qp *qp; + struct timer_list tx_timer; }; #define NTB_TX_TIMEOUT_MS 1000 @@ -136,11 +146,42 @@ enqueue_again: } } +static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev, + struct ntb_transport_qp *qp, int size) +{ + struct ntb_netdev *dev = netdev_priv(netdev); + + netif_stop_queue(netdev); + /* Make sure to see the latest value of ntb_transport_tx_free_entry() + * since the queue was last started. + */ + smp_mb(); + + if (likely(ntb_transport_tx_free_entry(qp) < size)) { + mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + return -EBUSY; + } + + netif_start_queue(netdev); + return 0; +} + +static int ntb_netdev_maybe_stop_tx(struct net_device *ndev, + struct ntb_transport_qp *qp, int size) +{ + if (netif_queue_stopped(ndev) || + (ntb_transport_tx_free_entry(qp) >= size)) + return 0; + + return __ntb_netdev_maybe_stop_tx(ndev, qp, size); +} + static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { struct net_device *ndev = qp_data; struct sk_buff *skb; + struct ntb_netdev *dev = netdev_priv(ndev); skb = data; if (!skb || !ndev) @@ -155,6 +196,15 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, } dev_kfree_skb(skb); + + if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) { + /* Make sure anybody stopping the queue after this sees the new + * value of ntb_transport_tx_free_entry() + */ + smp_mb(); + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } } static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, @@ -163,10 +213,15 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct ntb_netdev *dev = netdev_priv(ndev); int rc; + ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); if (rc) goto err; + /* check for next submit */ + ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + return NETDEV_TX_OK; err: @@ -175,6 +230,23 @@ err: return NETDEV_TX_BUSY; } +static void ntb_netdev_tx_timer(unsigned long data) +{ + struct net_device *ndev = (struct net_device *)data; + struct ntb_netdev *dev = netdev_priv(ndev); + + if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { + mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time)); + } else { + /* Make sure anybody stopping the queue after this sees the new + * value of ntb_transport_tx_free_entry() + */ + smp_mb(); + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } +} + static int ntb_netdev_open(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); @@ -197,8 +269,11 @@ static int ntb_netdev_open(struct net_device *ndev) } } + setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev); + netif_carrier_off(ndev); ntb_transport_link_up(dev->qp); + netif_start_queue(ndev); return 0; @@ -219,6 +294,8 @@ static int ntb_netdev_close(struct net_device *ndev) while ((skb = ntb_transport_rx_remove(dev->qp, &len))) dev_kfree_skb(skb); + del_timer_sync(&dev->tx_timer); + return 0; } diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 1c6386d..0d851d6 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -494,6 +494,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, "tx_index - \t%u\n", qp->tx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_max_entry - \t%u\n", qp->tx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "qp->remote_rx_info->entry - \t%u\n", + qp->remote_rx_info->entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "free tx - \t%u\n", + ntb_transport_tx_free_entry(qp)); out_offset += snprintf(buf + out_offset, out_count - out_offset, "\nQP Link %s\n", @@ -535,6 +541,7 @@ static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock, } entry = list_first_entry(list, struct ntb_queue_entry, entry); list_del(&entry->entry); + out: spin_unlock_irqrestore(lock, flags); @@ -1843,7 +1850,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (!entry) { qp->tx_err_no_buf++; - return -ENOMEM; + return -EBUSY; } entry->cb_data = cb; @@ -1969,6 +1976,15 @@ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) } EXPORT_SYMBOL_GPL(ntb_transport_max_size); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) +{ + unsigned int head = qp->tx_index; + unsigned int tail = qp->remote_rx_info->entry; + + return tail > head ? tail - head : qp->tx_max_entry + tail - head; +} +EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); + static void ntb_transport_doorbell_callback(void *data, int vector) { struct ntb_transport_ctx *nt = data; diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index 2862861..7243eb9 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); void ntb_transport_link_up(struct ntb_transport_qp *qp); void ntb_transport_link_down(struct ntb_transport_qp *qp); bool ntb_transport_link_query(struct ntb_transport_qp *qp); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp); -- cgit v0.10.2 From 0a5d19d9f046d770776508fdde959d2a42bce9f7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:18 -0400 Subject: NTB: Add PCI Device IDs for Broadwell Xeon Adding PCI Device IDs for B2B (back to back), RP (root port, primary), and TB (transparent bridge, secondary) devices. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 87751cf..c2bc56b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -190,14 +190,17 @@ static inline int pdev_is_xeon(struct pci_dev *pdev) case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: return 1; } return 0; @@ -1843,6 +1846,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; break; } @@ -1857,6 +1863,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; break; } @@ -1878,6 +1887,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; break; } @@ -2234,14 +2246,17 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)}, {0} }; MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index 7ddaf38..ea0612f7 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -67,6 +67,9 @@ #define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F #define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E +#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F /* Intel Xeon hardware */ -- cgit v0.10.2 From 315100004fd6d9189b033f3bf9c5eba9eb906705 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:19 -0400 Subject: NTB: Make the transport list in order of discovery The list should be added from the bottom and not the top in order to ensure the transport is provided in the same order to clients as ntb devices are discovered. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 0d851d6..29553fb 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -297,7 +297,7 @@ static LIST_HEAD(ntb_transport_list); static int ntb_bus_init(struct ntb_transport_ctx *nt) { - list_add(&nt->entry, &ntb_transport_list); + list_add_tail(&nt->entry, &ntb_transport_list); return 0; } -- cgit v0.10.2 From d98ef99e378b0d5c42be928d6f2abe08a5f9ce53 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:20 -0400 Subject: NTB: Clean up QP stats info Make QP stats info more readable for debugging purposes. Also add an entry to indicate whether DMA is being used. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 29553fb..777436c 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -452,7 +452,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset = 0; out_offset += snprintf(buf + out_offset, out_count - out_offset, - "NTB QP stats\n"); + "\nNTB QP stats:\n\n"); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_bytes - \t%llu\n", qp->rx_bytes); out_offset += snprintf(buf + out_offset, out_count - out_offset, @@ -470,11 +470,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_err_ver - \t%llu\n", qp->rx_err_ver); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_buff - \t%p\n", qp->rx_buff); + "rx_buff - \t0x%p\n", qp->rx_buff); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n\n", qp->rx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -489,21 +489,28 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_mw - \t%p\n", qp->tx_mw); + "tx_mw - \t0x%p\n", qp->tx_mw); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_index - \t%u\n", qp->tx_index); + "tx_index (H) - \t%u\n", qp->tx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_max_entry - \t%u\n", qp->tx_max_entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "qp->remote_rx_info->entry - \t%u\n", + "RRI (T) - \t%u\n", qp->remote_rx_info->entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, + "tx_max_entry - \t%u\n", qp->tx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "\nQP Link %s\n", + "\n"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using DMA - \t%s\n", use_dma ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "\n"); + if (out_offset > out_count) out_offset = out_count; -- cgit v0.10.2 From 905921e74864e80228e7f8cfe75315cd0a8cada8 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:21 -0400 Subject: NTB: Remove dma_sync_wait from ntb_async_rx The dma_sync_wait can hurt the performance of workloads mixed with both large and small frames. Large frames will be copied using the dma engine. Small frames will be copied by the cpu. The dma_sync_wait prevents the cpu and dma engine copying in parallel. In the period where the cpu is copying, the dma engine is stopped. The dma engine is not doing any useful work to copy large frames during that time, and the additional time to restart the dma engine for the next large frame. This will decrease the throughput for the portion of a workload with large frames. In the period where the dma engine is copying, the cpu is held up waiting for dma to complete. The small frames processing will be delayed until the dma is complete. The RX frames are completed in-order, and the processing of small frames takes very little time, so dma_sync_wait may have an insignificant impact on the respose time of frames. The more significant impact is to the system, because the delay in dma_sync_wait is implemented as busy non-blocking wait. This can prevent the delayed core from doing any useful work, even if it could be processing work for other drivers, unrelated to transport RX processing. After applying the earlier patch to fix out-of-order RX acknoledgement, the dma_sync_wait is no longer necessary. Remove it, so that cpu memcpy will proceed immediately for small frames, in parallel with ongoing dma for large frames. Do not hold up the cpu from doing work while dma is in progress. The prior fix will continue to ensure in-order completion of the RX frames to the upper layer, and in-order delivery of the RX acknoledgement. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 777436c..f6aae0f 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1233,18 +1233,18 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) goto err; if (len < copy_bytes) - goto err_wait; + goto err; device = chan->device; pay_off = (size_t)offset & ~PAGE_MASK; buff_off = (size_t)buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) - goto err_wait; + goto err; unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); if (!unmap) - goto err_wait; + goto err; unmap->len = len; unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), @@ -1287,12 +1287,6 @@ err_set_unmap: dmaengine_unmap_put(unmap); err_get_unmap: dmaengine_unmap_put(unmap); -err_wait: - /* If the callbacks come out of order, the writing of the index to the - * last completed will be out of order. This may result in the - * receive stalling forever. - */ - dma_sync_wait(chan, qp->last_cookie); err: ntb_memcpy_rx(entry, offset); qp->rx_memcpy++; -- cgit v0.10.2 From 569410ca756cd3ebb15609cb6828a8393fb6384d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:22 -0400 Subject: NTB: Use unique DMA channels for TX and RX Allocate two DMA channels, one for TX operation and one for RX operation, instead of having one DMA channel for everything. This provides slightly better performance, and also will make error handling cleaner later on. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f6aae0f..6e3ee90 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -119,7 +119,8 @@ struct ntb_transport_qp { struct ntb_transport_ctx *transport; struct ntb_dev *ndev; void *cb_data; - struct dma_chan *dma_chan; + struct dma_chan *tx_dma_chan; + struct dma_chan *rx_dma_chan; bool client_ready; bool link_is_up; @@ -504,7 +505,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "\n"); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "Using DMA - \t%s\n", use_dma ? "Yes" : "No"); + "Using TX DMA - \t%s\n", + qp->tx_dma_chan ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using RX DMA - \t%s\n", + qp->rx_dma_chan ? "Yes" : "No"); out_offset += snprintf(buf + out_offset, out_count - out_offset, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); @@ -1220,7 +1225,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) { struct dma_async_tx_descriptor *txd; struct ntb_transport_qp *qp = entry->qp; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->rx_dma_chan; struct dma_device *device; size_t pay_off, buff_off, len; struct dmaengine_unmap_data *unmap; @@ -1381,8 +1386,8 @@ static void ntb_transport_rxc_db(unsigned long data) break; } - if (i && qp->dma_chan) - dma_async_issue_pending(qp->dma_chan); + if (i && qp->rx_dma_chan) + dma_async_issue_pending(qp->rx_dma_chan); if (i == qp->rx_max_entry) { /* there is more work to do */ @@ -1449,7 +1454,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, { struct ntb_payload_header __iomem *hdr; struct dma_async_tx_descriptor *txd; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->tx_dma_chan; struct dma_device *device; size_t dest_off, buff_off; struct dmaengine_unmap_data *unmap; @@ -1642,14 +1647,27 @@ ntb_transport_create_queue(void *data, struct device *client_dev, dma_cap_set(DMA_MEMCPY, dma_mask); if (use_dma) { - qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, - (void *)(unsigned long)node); - if (!qp->dma_chan) - dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); + qp->tx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->tx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n"); + + qp->rx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->rx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n"); } else { - qp->dma_chan = NULL; + qp->tx_dma_chan = NULL; + qp->rx_dma_chan = NULL; } - dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", + qp->tx_dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for RX\n", + qp->rx_dma_chan ? "DMA" : "CPU"); for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); @@ -1684,8 +1702,10 @@ err2: err1: while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); - if (qp->dma_chan) - dma_release_channel(qp->dma_chan); + if (qp->tx_dma_chan) + dma_release_channel(qp->tx_dma_chan); + if (qp->rx_dma_chan) + dma_release_channel(qp->rx_dma_chan); nt->qp_bitmap_free |= qp_bit; err: return NULL; @@ -1709,12 +1729,27 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) pdev = qp->ndev->pdev; - if (qp->dma_chan) { - struct dma_chan *chan = qp->dma_chan; + if (qp->tx_dma_chan) { + struct dma_chan *chan = qp->tx_dma_chan; + /* Putting the dma_chan to NULL will force any new traffic to be + * processed by the CPU instead of the DAM engine + */ + qp->tx_dma_chan = NULL; + + /* Try to be nice and wait for any queued DMA engine + * transactions to process before smashing it with a rock + */ + dma_sync_wait(chan, qp->last_cookie); + dmaengine_terminate_all(chan); + dma_release_channel(chan); + } + + if (qp->rx_dma_chan) { + struct dma_chan *chan = qp->rx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be * processed by the CPU instead of the DAM engine */ - qp->dma_chan = NULL; + qp->rx_dma_chan = NULL; /* Try to be nice and wait for any queued DMA engine * transactions to process before smashing it with a rock @@ -1962,16 +1997,20 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) { unsigned int max; + unsigned int copy_align; if (!qp) return 0; - if (!qp->dma_chan) + if (!qp->tx_dma_chan && !qp->rx_dma_chan) return qp->tx_max_frame - sizeof(struct ntb_payload_header); + copy_align = max(qp->tx_dma_chan->device->copy_align, + qp->rx_dma_chan->device->copy_align); + /* If DMA engine usage is possible, try to find the max size for that */ max = qp->tx_max_frame - sizeof(struct ntb_payload_header); - max -= max % (1 << qp->dma_chan->device->copy_align); + max -= max % (1 << copy_align); return max; } -- cgit v0.10.2 From a7c23237481782fbea3c2230e362b72863e144b0 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 04:15:28 -0400 Subject: NTB: Fix documentation for ntb_link_is_up There was a copy and paste error in the documentation for ntb_link_is_up. The long description was mistakenly copied from ntb_link_set_trans. This adds the appropriate long description for ntb_link_is_up. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b02f72b..e3d3299 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) * @speed: OUT - The link speed expressed as PCIe generation number. * @width: OUT - The link width expressed as the number of PCIe lanes. * - * Set the translation of a memory window. The peer may access local memory - * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. * * Return: One if the link is up, zero if the link is down, otherwise a * negative value indicating the error number. -- cgit v0.10.2 From 86663c91866ae85c219f1a80ef2c9460b7ca5cd8 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 12:43:21 -0400 Subject: NTB: Fix documentation for ntb_peer_db_clear. The documentation should say "peer" not "local" when referring to the peer doorbell register. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason diff --git a/include/linux/ntb.h b/include/linux/ntb.h index e3d3299..f798e2a 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -794,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) } /** - * ntb_peer_db_clear() - clear bits in the local doorbell register + * ntb_peer_db_clear() - clear bits in the peer doorbell register * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * -- cgit v0.10.2 From 2aa2a77a489deda473c99a4c15074d092718912c Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 31 Aug 2015 09:30:59 -0400 Subject: NTB: Improve index handling in B2B MW workaround Check that b2b_mw_idx is in range of the number of memory windows when initializing the device. The workaround is considered to be in effect only if the device b2b_idx is exactly UINT_MAX, instead of any index past the last memory window. Only print B2B MW workaround information in debugfs if the workaround is in effect. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index c2bc56b..fc6af2d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -575,10 +575,13 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Connection Topology -\t%s\n", ntb_topo_string(ndev->ntb.topo)); - off += scnprintf(buf + off, buf_size - off, - "B2B Offset -\t\t%#lx\n", ndev->b2b_off); - off += scnprintf(buf + off, buf_size - off, - "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); + if (ndev->b2b_idx != UINT_MAX) { + off += scnprintf(buf + off, buf_size - off, + "B2B MW Idx -\t\t%u\n", ndev->b2b_idx); + off += scnprintf(buf + off, buf_size - off, + "B2B Offset -\t\t%#lx\n", ndev->b2b_off); + } + off += scnprintf(buf + off, buf_size - off, "BAR4 Split -\t\t%s\n", ndev->bar4_split ? "yes" : "no"); @@ -1487,7 +1490,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; - if (ndev->b2b_idx >= ndev->mw_count) { + if (ndev->b2b_idx == UINT_MAX) { dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); b2b_bar = 0; ndev->b2b_off = 0; @@ -1779,6 +1782,13 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev) else ndev->b2b_idx = b2b_mw_idx; + if (ndev->b2b_idx >= ndev->mw_count) { + dev_dbg(ndev_dev(ndev), + "b2b_mw_idx %d invalid for mw_count %u\n", + b2b_mw_idx, ndev->mw_count); + return -EINVAL; + } + dev_dbg(ndev_dev(ndev), "setting up b2b mw idx %d means %d\n", b2b_mw_idx, ndev->b2b_idx); @@ -2008,7 +2018,7 @@ static inline void ndev_init_struct(struct intel_ntb_dev *ndev, ndev->ntb.ops = &intel_ntb_ops; ndev->b2b_off = 0; - ndev->b2b_idx = INT_MAX; + ndev->b2b_idx = UINT_MAX; ndev->bar4_split = 0; -- cgit v0.10.2 From 9a07826f99034202dad589285a47132685d9538b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 31 Aug 2015 09:31:00 -0400 Subject: NTB: Fix range check on memory window index The range check must exclude the upper bound. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index fc6af2d..865a3e3 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -240,7 +240,7 @@ static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev, static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) { - if (idx < 0 || idx > ndev->mw_count) + if (idx < 0 || idx >= ndev->mw_count) return -EINVAL; return ndev->reg->mw_bar[idx]; } -- cgit v0.10.2 From d075a88e515278b3e2d5ec39e80e6fd3c6e48841 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 3 Sep 2015 12:15:21 +0530 Subject: cpufreq: staticize cpufreq_cpu_get_raw() cpufreq_cpu_get_raw() isn't used by any external users, staticize it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index abb7768..feb6dc5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -239,7 +239,7 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, EXPORT_SYMBOL_GPL(cpufreq_generic_init); /* Only for cpufreq core internal use */ -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) +static struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) { struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); -- cgit v0.10.2 From 63431f789c957cebcffeb51953dfaeb7f1ddd827 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 27 Jul 2015 17:58:06 +0530 Subject: cpufreq: Use __func__ to print function's name Its better to use __func__ to print functions name instead of writing the name in the print statement. This also has the advantage that a change in function's name doesn't force us to change the print message as well. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index feb6dc5..8b6bf38 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2033,8 +2033,7 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, if (!try_module_get(policy->governor->owner)) return -EINVAL; - pr_debug("__cpufreq_governor for CPU %u, event %u\n", - policy->cpu, event); + pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); mutex_lock(&cpufreq_governor_lock); if ((policy->governor_enabled && event == CPUFREQ_GOV_START) -- cgit v0.10.2 From 6af3e3adcac595a683bb55299a907d7d1ad61ab3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 7 Sep 2015 10:41:12 +0100 Subject: Btrfs: don't initialize a space info as full to prevent ENOSPC Commit 2e6e518335f8 ("Btrfs: fix block group ->space_info null pointer dereference") accidently marked a space info as full when initializing it with a value of 0 total bytes. This introduces an ENOSPC problem when writing file data if we mount a filesystem that has no data block groups allocated, because the data space info is initialized with 0 total bytes, marked as full, and it never gets its total bytes incremented by a (positive) value to unmark it as full (because there are no data block groups loaded when the fs is mounted). For metadata and system spaces this issue can never happen since we always have at least one metadata block group and one system block group (even for an empty filesystem). So fix this by just not initializing a space info as full, reverting the offending part of the commit mentioned above. The following test case for fstests reproduces the issue: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 # Mount our filesystem without space caches enabled so that we do not # get any space used from the initial data block group that mkfs creates # (space caches used space from data block groups). _scratch_mount "-o nospace_cache" # Need an fs with at least 2Gb to make sure mkfs.btrfs does not create # an fs using mixed block groups (used both for data and metadata). We # really need to have dedicated block groups for data to reproduce the # issue and mkfs.btrfs defaults to mixed block groups only for small # filesystems (up to 1Gb). _require_fs_space $SCRATCH_MNT $((2 * 1024 * 1024)) # Run balance with the purpose of deleting the unused data block group # that mkfs created. We could also wait for the background kthread to # automatically delete the unused block group, but we do not have a way # to make it run and wait for it to complete, so just do a balance # instead of some unreliable sleep _run_btrfs_util_prog balance start -dusage=0 $SCRATCH_MNT # Now unmount the filesystem, mount it again (either with or with space # caches enabled, it does not matter to trigger the problem) and attempt # to create a file with some data - this used to fail with ENOSPC # because there were no data block groups when the filesystem was # mounted and the data space info object was marked as full when # initialized (because it had 0 total bytes), which prevented the file # write path from attempting to allocate a data block group and fail # immediately with ENOSPC. _scratch_remount echo "hello world" > $SCRATCH_MNT/foobar echo "Silence is golden" status=0 exit Signed-off-by: Filipe Manana diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5411f0a..3686315 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3742,10 +3742,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_reserved = 0; found->bytes_readonly = 0; found->bytes_may_use = 0; - if (total_bytes > 0) - found->full = 0; - else - found->full = 1; + found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; found->flush = 0; -- cgit v0.10.2 From daebaabb5cfbe4a6f09ca0e0f8b7673efc704960 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 7 Sep 2015 15:52:40 +0530 Subject: powerpc/pseries: Release DRC when configure_connector fails Commit f32393c943e2 ("powerpc/pseries: Correct cpu affinity for dlpar added cpus") moved dlpar_acquire_drc() call to before dlpar_configure_connector() call in dlpar_cpu_probe(), but missed to release the DRC if dlpar_configure_connector() failed. During CPU hotplug, if configure-connector fails for any reason, then this will result in subsequent CPU hotplug attempts to fail. Release the acquired DRC if dlpar_configure_connector() call fails so that the DRC is left in right isolation and allocation state for the subsequent hotplug operation to succeed. Fixes: f32393c943e2 ("powerpc/pseries: Correct cpu affinity for dlpar added cpus") Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Bharata B Rao Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47d9cebe..db17827 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -422,8 +422,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); of_node_put(parent); - if (!dn) + if (!dn) { + dlpar_release_drc(drc_index); return -EINVAL; + } rc = dlpar_attach_node(dn); if (rc) { -- cgit v0.10.2 From edcd591c77a48da753456f92daf8bb50fe9bac93 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 7 Sep 2015 13:18:03 -0600 Subject: locking/static_keys: Fix a silly typo Commit: 412758cb2670 ("jump label, locking/static_keys: Update docs") introduced a typo that might as well get fixed. Signed-off-by: Jonathan Corbet Cc: Andrew Morton Cc: Jason Baron Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150907131803.54c027e1@lwn.net Signed-off-by: Ingo Molnar diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index f4cb0b2..ec91158 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -16,7 +16,7 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); static_key_likely() -statick_key_unlikely() +static_key_unlikely() 0) Abstract diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8..0684bd3 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -22,7 +22,7 @@ * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * static_key_likely() - * statick_key_unlikely() + * static_key_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we -- cgit v0.10.2 From 48fec5d0a504dfbb302cb1dd24ebb0b82a46cce9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 1 Jul 2015 16:27:46 +0800 Subject: ceph: EIO all operations after forced umount This patch makes try_get_cap_refs() and __do_request() check if the file system was forced umount, and return -EIO if it was. This patch also adds a helper function to drops dirty caps and wakes up blocking operation. Signed-off-by: Yan, Zheng diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 890c509..1594f2c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -717,7 +717,7 @@ static int ceph_writepages_start(struct address_space *mapping, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); - if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { + if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { pr_warn("writepage_start %p on forced umount\n", inode); return -EIO; /* we're in a forced umount, don't write! */ } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ddd5e94..27b5668 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2413,6 +2413,14 @@ again: goto out_unlock; } + if (!__ceph_is_any_caps(ci) && + ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + dout("get_cap_refs %p forced umount\n", inode); + *err = -EIO; + ret = 1; + goto out_unlock; + } + dout("get_cap_refs %p have %s needed %s\n", inode, ceph_cap_string(have), ceph_cap_string(need)); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6aa07af..ddc471c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2107,7 +2107,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, msg = create_request_message(mdsc, req, mds, drop_cap_releases); if (IS_ERR(msg)) { req->r_err = PTR_ERR(msg); - complete_request(mdsc, req); return PTR_ERR(msg); } req->r_request = msg; @@ -2135,7 +2134,7 @@ static int __do_request(struct ceph_mds_client *mdsc, { struct ceph_mds_session *session = NULL; int mds = -1; - int err = -EAGAIN; + int err = 0; if (req->r_err || req->r_got_result) { if (req->r_aborted) @@ -2149,6 +2148,11 @@ static int __do_request(struct ceph_mds_client *mdsc, err = -EIO; goto finish; } + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + dout("do_request forced umount\n"); + err = -EIO; + goto finish; + } put_request_session(req); @@ -2196,13 +2200,15 @@ static int __do_request(struct ceph_mds_client *mdsc, out_session: ceph_put_mds_session(session); +finish: + if (err) { + dout("__do_request early error %d\n", err); + req->r_err = err; + complete_request(mdsc, req); + __unregister_request(mdsc, req); + } out: return err; - -finish: - req->r_err = err; - complete_request(mdsc, req); - goto out; } /* @@ -2289,8 +2295,6 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, if (req->r_err) { err = req->r_err; - __unregister_request(mdsc, req); - dout("do_request early error %d\n", err); goto out; } @@ -3555,7 +3559,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush, want_snap; - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -3584,7 +3588,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) */ static bool done_closing_sessions(struct ceph_mds_client *mdsc) { - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return true; return atomic_read(&mdsc->num_sessions) == 0; } @@ -3643,6 +3647,34 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("stopped\n"); } +void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_session *session; + int mds; + + dout("force umount\n"); + + mutex_lock(&mdsc->mutex); + for (mds = 0; mds < mdsc->max_sessions; mds++) { + session = __ceph_lookup_mds_session(mdsc, mds); + if (!session) + continue; + mutex_unlock(&mdsc->mutex); + mutex_lock(&session->s_mutex); + __close_session(mdsc, session); + if (session->s_state == CEPH_MDS_SESSION_CLOSING) { + cleanup_session_requests(mdsc, session); + remove_session_caps(session); + } + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); + mutex_lock(&mdsc->mutex); + kick_requests(mdsc, mds); + } + __wake_requests(mdsc, &mdsc->waiting_for_map); + mutex_unlock(&mdsc->mutex); +} + static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 762757e..f575eaf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -366,6 +366,7 @@ extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, extern int ceph_mdsc_init(struct ceph_fs_client *fsc); extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); +extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d1c833c..69e6e19 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -708,6 +708,7 @@ static void ceph_umount_begin(struct super_block *sb) if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; + ceph_mdsc_force_umount(fsc->mdsc); return; } -- cgit v0.10.2 From a341d4df87487ae68189e0be869c39a2b0cb9aaa Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 1 Jul 2015 17:03:23 +0800 Subject: ceph: invalidate dirty pages after forced umount After forced umount, ceph_writepages_start() skips flushing dirty pages. To make sure inode's reference count get dropped to zero, we need to invalidate dirty pages. Signed-off-by: Yan, Zheng diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1594f2c..9893335 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -719,6 +719,8 @@ static int ceph_writepages_start(struct address_space *mapping, if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { pr_warn("writepage_start %p on forced umount\n", inode); + truncate_pagecache(inode, 0); + mapping_set_error(mapping, -EIO); return -EIO; /* we're in a forced umount, don't write! */ } if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) -- cgit v0.10.2 From b79b23682a1649f30960fb5bd920ba46c89a1b14 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Sun, 5 Jul 2015 06:34:05 +0000 Subject: libceph: remove the unused macro AES_KEY_SIZE This removes the no longer used macro AES_KEY_SIZE as no functions use this macro anymore and thus this macro can be removed due it no longer being required. Signed-off-by: Nicholas Krause Signed-off-by: Ilya Dryomov diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 790fe89..4440edc 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -79,10 +79,6 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) return 0; } - - -#define AES_KEY_SIZE 16 - static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) { return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); -- cgit v0.10.2 From d920ff6fc7c1ec3d7bd80432bff5575c0ebe426c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Thu, 25 Jun 2015 21:02:57 +0200 Subject: libceph: Avoid holding the zero page on ceph_msgr_slab_init errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ceph_msgr_slab_init may fail due to a temporary ENOMEM. Delay a bit the initialization of zero_page in ceph_msgr_init and reorder its cleanup in _ceph_msgr_exit so it's done in reverse order of setup. BUG_ON() will not suffer to be postponed in case it is triggered. Signed-off-by: Benoît Canet Reviewed-by: Alex Elder Signed-off-by: Ilya Dryomov diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e3be1d2..0f9ea60 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -276,22 +276,22 @@ static void _ceph_msgr_exit(void) ceph_msgr_wq = NULL; } - ceph_msgr_slab_exit(); - BUG_ON(zero_page == NULL); page_cache_release(zero_page); zero_page = NULL; + + ceph_msgr_slab_exit(); } int ceph_msgr_init(void) { + if (ceph_msgr_slab_init()) + return -ENOMEM; + BUG_ON(zero_page != NULL); zero_page = ZERO_PAGE(0); page_cache_get(zero_page); - if (ceph_msgr_slab_init()) - return -ENOMEM; - /* * The number of active work items is limited by the number of * connections, so leave @max_active at default. -- cgit v0.10.2 From 6893162215d7bf08a4273247ec1fc7dedee5135c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 3 Jul 2015 15:44:41 +0300 Subject: libceph: rename con_work() to ceph_con_workfn() Even though it's static, con_work(), being a work func, shows up in various stacktraces a lot. Prefix it with ceph_. Signed-off-by: Ilya Dryomov diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0f9ea60..101ab62 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -176,7 +176,7 @@ static struct lock_class_key socket_class; static void queue_con(struct ceph_connection *con); static void cancel_con(struct ceph_connection *con); -static void con_work(struct work_struct *); +static void ceph_con_workfn(struct work_struct *); static void con_fault(struct ceph_connection *con); /* @@ -749,7 +749,7 @@ void ceph_con_init(struct ceph_connection *con, void *private, mutex_init(&con->mutex); INIT_LIST_HEAD(&con->out_queue); INIT_LIST_HEAD(&con->out_sent); - INIT_DELAYED_WORK(&con->work, con_work); + INIT_DELAYED_WORK(&con->work, ceph_con_workfn); con->state = CON_STATE_CLOSED; } @@ -2799,7 +2799,7 @@ static void con_fault_finish(struct ceph_connection *con) /* * Do some work on a connection. Drop a connection ref when we're done. */ -static void con_work(struct work_struct *work) +static void ceph_con_workfn(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); -- cgit v0.10.2 From 23078637e05460428f803be7d0f46908df8a970a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 20 Jul 2015 10:14:06 +0800 Subject: ceph: fix queuing inode to mdsdir's snaprealm During MDS failovers, MClientSnap message may cause kclient to move some inodes from root directory's snaprealm to mdsdir's snaprealm and queue snapshots for these inodes. For a FS has never created any snapshot, both root directory's snaprealm and mdsdir's snaprealm share the same snapshot contexts (both are ceph_empty_snapc). This confuses ceph_put_wrbuffer_cap_refs(), make it unable to distinguish snapshot buffers from head buffers. The fix is do not use ceph_empty_snapc as snaprealm's cached context. Signed-off-by: Yan, Zheng diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 233d906a..4aa7122 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -338,12 +338,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } - if (num == 0 && realm->seq == ceph_empty_snapc->seq) { - ceph_get_snap_context(ceph_empty_snapc); - snapc = ceph_empty_snapc; - goto done; - } - /* alloc new snap context */ err = -ENOMEM; if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) @@ -381,7 +375,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); -done: ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; return 0; -- cgit v0.10.2 From 1550d34e5626a20a2e12c73bdc1e6e217a0ba897 Mon Sep 17 00:00:00 2001 From: Brad Hubbard Date: Tue, 18 Aug 2015 10:18:53 +0800 Subject: ceph: remove redundant test of head->safe and silence static analysis warnings Signed-off-by: Brad Hubbard Signed-off-by: Yan, Zheng diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ddc471c..d4eaa84 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2415,7 +2415,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); goto out; } - if (req->r_got_safe && !head->safe) { + if (req->r_got_safe) { pr_warn("got unsafe after safe on %llu from mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); -- cgit v0.10.2 From a43137f7b0f1467cf3005b6ff6574d978642d247 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Tue, 18 Aug 2015 10:23:50 +0800 Subject: ceph: remove the useless judgement err != 0 is already handled. So skip this. Signed-off-by: Jianpeng Ma Signed-off-by: Yan, Zheng diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8b79d87..2ebcbd4 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -279,7 +279,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err) goto out_req; - if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); if (d_unhashed(dentry)) { -- cgit v0.10.2 From e36d571d70c7f46b20c28d81025fd5fc044a8e22 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Tue, 18 Aug 2015 10:25:35 +0800 Subject: ceph: no need to get parent inode in ceph_open parent inode is needed in creating new inode case. For ceph_open, the target inode already exists. Signed-off-by: Jianpeng Ma Signed-off-by: Yan, Zheng diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2ebcbd4..90ec110 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -136,7 +136,6 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *cf = file->private_data; - struct inode *parent_inode = NULL; int err; int flags, fmode, wanted; @@ -210,10 +209,7 @@ int ceph_open(struct inode *inode, struct file *file) ihold(inode); req->r_num_caps = 1; - if (flags & O_CREAT) - parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); - err = ceph_mdsc_do_request(mdsc, parent_inode, req); - iput(parent_inode); + err = ceph_mdsc_do_request(mdsc, NULL, req); if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); -- cgit v0.10.2 From 5fdb1389e1399d6801a8c5d10952ef4153039fb2 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Tue, 18 Aug 2015 10:30:38 +0800 Subject: ceph: cleanup use of ceph_msg_get Signed-off-by: Jianpeng Ma Signed-off-by: Yan, Zheng diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d4eaa84..51cb02d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2524,8 +2524,7 @@ out_err: if (err) { req->r_err = err; } else { - req->r_reply = msg; - ceph_msg_get(msg); + req->r_reply = ceph_msg_get(msg); req->r_got_result = true; } } else { -- cgit v0.10.2 From 6dd74e44dc1df85f125982a8d6591bc4a76c9f5d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 28 Aug 2015 17:59:35 +0800 Subject: libceph: set 'exists' flag for newly up osd Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4a31258..7d8f581 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1300,7 +1300,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ceph_decode_addr(&addr); pr_info("osd%d up\n", osd); BUG_ON(osd >= map->max_osd); - map->osd_state[osd] |= CEPH_OSD_UP; + map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; map->osd_addr[osd] = addr; } -- cgit v0.10.2 From 3ebe138ac642a195c7f2efdb918f464734421fd6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 31 Aug 2015 15:21:39 +0300 Subject: rbd: fix double free on rbd_dev->header_name If rbd_dev_image_probe() in rbd_dev_probe_parent() fails, header_name is freed twice: once in rbd_dev_probe_parent() and then in its caller rbd_dev_image_probe() (rbd_dev_image_probe() is called recursively to handle parent images). rbd_dev_probe_parent() is responsible for probing the parent, so it shouldn't muck with clone's fields. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bc67a93..324bf35 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5201,7 +5201,6 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) out_err: if (parent) { rbd_dev_unparent(rbd_dev); - kfree(rbd_dev->header_name); rbd_dev_destroy(parent); } else { rbd_put_client(rbdc); -- cgit v0.10.2 From d194cd1dd1be61249b08e5461ae8a9c05d1072c9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 31 Aug 2015 18:22:10 +0300 Subject: rbd: plug rbd_dev->header.object_prefix memory leak Need to free object_prefix when rbd_dev_v2_snap_context() fails, but only if this is the first time we are reading in the header. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 324bf35..69d03aa 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4720,7 +4720,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) } ret = rbd_dev_v2_snap_context(rbd_dev); - dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret && first_time) { + kfree(rbd_dev->header.object_prefix); + rbd_dev->header.object_prefix = NULL; + } return ret; } -- cgit v0.10.2 From 8b9558aab853e98ba6e3fee0dd8545544966958c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 1 Sep 2015 17:19:38 +0800 Subject: libceph: use keepalive2 to verify the mon session is alive Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53..397c5cd 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -46,6 +46,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long monc_ping_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -66,6 +67,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 3775327..7e1252e 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -248,6 +248,8 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ + struct timespec last_keepalive_ack; + struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ }; @@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); extern void ceph_con_keepalive(struct ceph_connection *con); +extern bool ceph_con_keepalive_expired(struct ceph_connection *con, + unsigned long interval); extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, size_t length, size_t alignment); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c18872..0fe2656 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -84,10 +84,12 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_MSG 7 /* message */ #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ -#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ +#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ +#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ /* diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index f30329f..3f56eef 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name, opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; + opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 101ab62..36757d4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache; static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; +static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; #ifdef CONFIG_LOCKDEP static struct lock_class_key socket_class; @@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); - con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); + if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { + struct timespec ts = CURRENT_TIME; + struct ceph_timespec ceph_ts; + ceph_encode_timespec(&ceph_ts, &ts); + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); + con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); + } else { + con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); + } con_flag_set(con, CON_FLAG_WRITE_PENDING); } @@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con) con->in_tag = CEPH_MSGR_TAG_READY; } +static void prepare_read_keepalive_ack(struct ceph_connection *con) +{ + dout("prepare_read_keepalive_ack %p\n", con); + con->in_base_pos = 0; +} + /* * Prepare to read a message. */ @@ -2457,6 +2472,17 @@ static void process_message(struct ceph_connection *con) mutex_lock(&con->mutex); } +static int read_keepalive_ack(struct ceph_connection *con) +{ + struct ceph_timespec ceph_ts; + size_t size = sizeof(ceph_ts); + int ret = read_partial(con, size, size, &ceph_ts); + if (ret <= 0) + return ret; + ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); + prepare_read_tag(con); + return 1; +} /* * Write something to the socket. Called in a worker thread when the @@ -2526,6 +2552,10 @@ more_kvec: do_next: if (con->state == CON_STATE_OPEN) { + if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { + prepare_write_keepalive(con); + goto more; + } /* is anything else pending? */ if (!list_empty(&con->out_queue)) { prepare_write_message(con); @@ -2535,10 +2565,6 @@ do_next: prepare_write_ack(con); goto more; } - if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { - prepare_write_keepalive(con); - goto more; - } } /* Nothing to do! */ @@ -2641,6 +2667,9 @@ more: case CEPH_MSGR_TAG_ACK: prepare_read_ack(con); break; + case CEPH_MSGR_TAG_KEEPALIVE2_ACK: + prepare_read_keepalive_ack(con); + break; case CEPH_MSGR_TAG_CLOSE: con_close_socket(con); con->state = CON_STATE_CLOSED; @@ -2684,6 +2713,12 @@ more: process_ack(con); goto more; } + if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { + ret = read_keepalive_ack(con); + if (ret <= 0) + goto out; + goto more; + } out: dout("try_read done on %p ret %d\n", con, ret); @@ -3101,6 +3136,20 @@ void ceph_con_keepalive(struct ceph_connection *con) } EXPORT_SYMBOL(ceph_con_keepalive); +bool ceph_con_keepalive_expired(struct ceph_connection *con, + unsigned long interval) +{ + if (interval > 0 && + (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { + struct timespec now = CURRENT_TIME; + struct timespec ts; + jiffies_to_timespec(interval, &ts); + ts = timespec_add(con->last_keepalive_ack, ts); + return timespec_compare(&now, &ts) >= 0; + } + return false; +} + static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) { struct ceph_msg_data *data; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff12..edda016 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc) CEPH_ENTITY_TYPE_MON, monc->cur_mon, &monc->monmap->mon_inst[monc->cur_mon].addr); + /* send an initial keepalive to ensure our timestamp is + * valid by the time we are in an OPENED state */ + ceph_con_keepalive(&monc->con); + /* initiatiate authentication handshake */ ret = ceph_auth_build_hello(monc->auth, monc->m_auth->front.iov_base, @@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc) */ static void __schedule_delayed(struct ceph_mon_client *monc) { - unsigned int delay; + struct ceph_options *opt = monc->client->options; + unsigned long delay; - if (monc->cur_mon < 0 || __sub_expired(monc)) + if (monc->cur_mon < 0 || __sub_expired(monc)) { delay = 10 * HZ; - else + } else { delay = 20 * HZ; - dout("__schedule_delayed after %u\n", delay); - schedule_delayed_work(&monc->delayed_work, delay); + if (opt->monc_ping_timeout > 0) + delay = min(delay, opt->monc_ping_timeout / 3); + } + dout("__schedule_delayed after %lu\n", delay); + schedule_delayed_work(&monc->delayed_work, + round_jiffies_relative(delay)); } /* @@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work) __close_session(monc); __open_session(monc); /* continue hunting */ } else { - ceph_con_keepalive(&monc->con); + struct ceph_options *opt = monc->client->options; + int is_auth = ceph_auth_is_authenticated(monc->auth); + if (ceph_con_keepalive_expired(&monc->con, + opt->monc_ping_timeout)) { + dout("monc keepalive timeout\n"); + is_auth = 0; + __close_session(monc); + monc->hunting = true; + __open_session(monc); + } - __validate_auth(monc); + if (!monc->hunting) { + ceph_con_keepalive(&monc->con); + __validate_auth(monc); + } - if (ceph_auth_is_authenticated(monc->auth)) + if (is_auth) __send_subscribe(monc); } __schedule_delayed(monc); -- cgit v0.10.2 From 7179385afef252cd3f52c0a06cc0c405ae6d66bc Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Fri, 12 Jun 2015 09:44:52 -0400 Subject: sb_edac: look harder for DDRIO on Haswell systems In case the memory banks are populated so the first channel isn't used, the DDRIO PCI device won't be visible and it won't be possible to determine the memory type. Acked-by: Tony Luck Signed-off-by: Aristeu Rozanski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ca78311..84b740d 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -471,6 +471,9 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = { #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2 0x2f6c #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3 0x2f6d #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0 0x2fbd +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1 0x2fbf +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2 0x2fb9 +#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb static const struct pci_id_descr pci_dev_descr_haswell[] = { /* first item must be the HA */ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0) }, @@ -488,6 +491,9 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = { { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1) }, { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1) }, + { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1) }, { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1) }, { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1) }, @@ -1869,7 +1875,11 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci, } break; case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0: - pvt->pci_ddrio = pdev; + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2: + case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3: + if (!pvt->pci_ddrio) + pvt->pci_ddrio = pdev; break; case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1: pvt->pci_ha1 = pdev; -- cgit v0.10.2 From 12f0721c5a70408e86257c5c99605cf743cd44c6 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Fri, 12 Jun 2015 15:08:17 -0400 Subject: sb_edac: correctly fetch DIMM width on Ivy Bridge and Haswell dimm_dev_type has been incorrectly determined in sb_edac. This patch fixes it for Ivy Bridge and Haswell only since nothing like exists for Sandy Bridge. We tested this patch in multiple systems matching the results with the installed memory modules. Acked-by: Tony Luck Signed-off-by: Aristeu Rozanski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 84b740d..cf1268d 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -280,6 +280,7 @@ struct sbridge_info { u8 max_interleave; u8 (*get_node_id)(struct sbridge_pvt *pvt); enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); + enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); struct pci_dev *pci_vtd; }; @@ -768,6 +769,49 @@ out: return mtype; } +static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* there's no way to figure out */ + return DEV_UNKNOWN; +} + +static enum dev_type __ibridge_get_width(u32 mtr) +{ + enum dev_type type; + + switch (mtr) { + case 3: + type = DEV_UNKNOWN; + break; + case 2: + type = DEV_X16; + break; + case 1: + type = DEV_X8; + break; + case 0: + type = DEV_X4; + break; + } + + return type; +} + +static enum dev_type ibridge_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* + * ddr3_width on the documentation but also valid for DDR4 on + * Haswell + */ + return __ibridge_get_width(GET_BITFIELD(mtr, 7, 8)); +} + +static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr) +{ + /* ddr3_width on the documentation but also valid for DDR4 */ + return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9)); +} + static u8 get_node_id(struct sbridge_pvt *pvt) { u32 reg; @@ -972,17 +1016,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) dimm->nr_pages = npages; dimm->grain = 32; - switch (banks) { - case 16: - dimm->dtype = DEV_X16; - break; - case 8: - dimm->dtype = DEV_X8; - break; - case 4: - dimm->dtype = DEV_X4; - break; - } + dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->mtype = mtype; dimm->edac_mode = mode; snprintf(dimm->label, sizeof(dimm->label), @@ -2371,6 +2405,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = ibridge_get_width; mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx); /* Store pci devices at mci for faster access */ @@ -2390,6 +2425,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.interleave_list = sbridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); pvt->info.interleave_pkg = sbridge_interleave_pkg; + pvt->info.get_width = sbridge_get_width; mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); /* Store pci devices at mci for faster access */ @@ -2409,6 +2445,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = ibridge_get_width; mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx); /* Store pci devices at mci for faster access */ @@ -2428,6 +2465,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.interleave_list = ibridge_interleave_list; pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); pvt->info.interleave_pkg = ibridge_interleave_pkg; + pvt->info.get_width = broadwell_get_width; mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx); /* Store pci devices at mci for faster access */ -- cgit v0.10.2 From f33b77408a91d4427374010897b90af678dc47be Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 8 Sep 2015 19:06:03 +0200 Subject: staging: board: Migrate away from __pm_genpd_name_add_device() The named genpd APIs are deprecated. Hence convert the board staging code from using genpd names to DT node paths. For now this supports PM domains with "#power-domain-cells = <0>" only. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulf Hansson Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/staging/board/armadillo800eva.c b/drivers/staging/board/armadillo800eva.c index 81df77b..9c41652 100644 --- a/drivers/staging/board/armadillo800eva.c +++ b/drivers/staging/board/armadillo800eva.c @@ -91,7 +91,7 @@ static const struct board_staging_dev armadillo800eva_devices[] __initconst = { .pdev = &lcdc0_device, .clocks = lcdc0_clocks, .nclocks = ARRAY_SIZE(lcdc0_clocks), - .domain = "a4lc", + .domain = "/system-controller@e6180000/pm-domains/c5/a4lc@1" }, }; diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c index 29d456e..3eb5eb8 100644 --- a/drivers/staging/board/board.c +++ b/drivers/staging/board/board.c @@ -135,6 +135,40 @@ int __init board_staging_register_clock(const struct board_staging_clk *bsc) return error; } +#ifdef CONFIG_PM_GENERIC_DOMAINS_OF +static int board_staging_add_dev_domain(struct platform_device *pdev, + const char *domain) +{ + struct of_phandle_args pd_args; + struct generic_pm_domain *pd; + struct device_node *np; + + np = of_find_node_by_path(domain); + if (!np) { + pr_err("Cannot find domain node %s\n", domain); + return -ENOENT; + } + + pd_args.np = np; + pd_args.args_count = 0; + pd = of_genpd_get_from_provider(&pd_args); + if (IS_ERR(pd)) { + pr_err("Cannot find genpd %s (%ld)\n", domain, PTR_ERR(pd)); + return PTR_ERR(pd); + + } + pr_debug("Found genpd %s for device %s\n", pd->name, pdev->name); + + return pm_genpd_add_device(pd, &pdev->dev); +} +#else +static inline int board_staging_add_dev_domain(struct platform_device *pdev, + const char *domain) +{ + return 0; +} +#endif + int __init board_staging_register_device(const struct board_staging_dev *dev) { struct platform_device *pdev = dev->pdev; @@ -161,7 +195,7 @@ int __init board_staging_register_device(const struct board_staging_dev *dev) } if (dev->domain) - __pm_genpd_name_add_device(dev->domain, &pdev->dev, NULL); + board_staging_add_dev_domain(pdev, dev->domain); return error; } -- cgit v0.10.2 From 4eafbd15b6c84cd3f6c76022c8a6c27f7cc076e1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Sep 2015 18:41:01 +0200 Subject: PM / OPP: add dev_pm_opp_get_suspend_opp() helper Add dev_pm_opp_get_suspend_opp() helper to obtain suspend opp. Acked-by: Viresh Kumar Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index bb703b5..3df62db 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -341,6 +341,36 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); /** + * dev_pm_opp_get_suspend_opp() - Get suspend opp + * @dev: device for which we do this operation + * + * Return: This function returns pointer to the suspend opp if it is + * defined, otherwise it returns NULL. + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. The reason for the same is that the opp pointer which is + * returned will remain valid for use with opp_get_{voltage, freq} only while + * under the locked area. The pointer returned must be used prior to unlocking + * with rcu_read_unlock() to maintain the integrity of the pointer. + */ +struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) +{ + struct device_opp *dev_opp; + struct dev_pm_opp *opp; + + opp_rcu_lockdep_assert(); + + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) + opp = NULL; + else + opp = dev_opp->suspend_opp; + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); + +/** * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list * @dev: device for which we do this operation * diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cab7ba5..e817722 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); +struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, @@ -80,6 +81,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) return 0; } +static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) +{ + return NULL; +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { -- cgit v0.10.2 From 201f3716575781b83259ed026845a213c2355035 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Sep 2015 18:41:02 +0200 Subject: cpufreq: allow cpufreq_generic_suspend() to work without suspend frequency Some cpufreq drivers may set suspend frequency only for selected setups but still would like to use the generic suspend handler. Thus don't treat !policy->suspend_freq condition as an incorrect one. Acked-by: Viresh Kumar Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8b6bf38..ca96333 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1628,8 +1628,8 @@ int cpufreq_generic_suspend(struct cpufreq_policy *policy) int ret; if (!policy->suspend_freq) { - pr_err("%s: suspend_freq can't be zero\n", __func__); - return -EINVAL; + pr_debug("%s: suspend_freq not defined\n", __func__); + return 0; } pr_debug("%s: Setting suspend-freq: %u\n", __func__, -- cgit v0.10.2 From 953ba9ff77f3d08635712eaeffb218d46889b58a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Sep 2015 18:41:03 +0200 Subject: cpufreq-dt: add suspend frequency support Add suspend frequency support and if needed set it to the frequency obtained from the suspend opp (can be defined using opp-v2 bindings and is optional). Acked-by: Viresh Kumar Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 3b64c20..7c0d70e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -196,6 +196,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct device *cpu_dev; struct regulator *cpu_reg; struct clk *cpu_clk; + struct dev_pm_opp *suspend_opp; unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; bool need_update = false; @@ -333,6 +334,13 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->driver_data = priv; policy->clk = cpu_clk; + + rcu_read_lock(); + suspend_opp = dev_pm_opp_get_suspend_opp(cpu_dev); + if (suspend_opp) + policy->suspend_freq = dev_pm_opp_get_freq(suspend_opp) / 1000; + rcu_read_unlock(); + ret = cpufreq_table_validate_and_show(policy, freq_table); if (ret) { dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__, @@ -423,6 +431,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { .ready = cpufreq_ready, .name = "cpufreq-dt", .attr = cpufreq_dt_attr, + .suspend = cpufreq_generic_suspend, }; static int dt_cpufreq_probe(struct platform_device *pdev) -- cgit v0.10.2 From b855d45dc3175eb3e602b945805c7b6aa8c04559 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 Sep 2015 15:16:09 +1000 Subject: powerpc: Wire up sys_userfaultfd() The selftest passes on 64-bit LE and BE. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 71f2b3f..4d65499 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -368,3 +368,4 @@ SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) COMPAT_SYS(execveat) PPC64ONLY(switch_endian) +SYSCALL_SPU(userfaultfd) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f4f8b66..4a055b6 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 364 +#define __NR_syscalls 365 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e4aa173..6ad58d4 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -386,5 +386,6 @@ #define __NR_bpf 361 #define __NR_execveat 362 #define __NR_switch_endian 363 +#define __NR_userfaultfd 364 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v0.10.2 From d15f9d694b77fe5e4ea12b3031ecaa13b5aa2b10 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 2 Sep 2015 11:37:09 +0300 Subject: libceph: check data_len in ->alloc_msg() Only ->alloc_msg() should check data_len of the incoming message against the preallocated ceph_msg, doing it in the messenger is not right. The contract is that either ->alloc_msg() returns a ceph_msg which will fit all of the portions of the incoming message, or it returns NULL and possibly sets skip, signaling whether NULL is due to an -ENOMEM. ->alloc_msg() should be the only place where we make the skip/no-skip decision. I stumbled upon this while looking at con/osd ref counting. Right now, if we get a non-extent message with a larger data portion than we are prepared for, ->alloc_msg() returns a ceph_msg, and then, when we skip it in the messenger, we don't put the con/osd ref acquired in ceph_con_in_msg_alloc() (which is normally put in process_message()), so this also fixes a memory leak. An existing BUG_ON in ceph_msg_data_cursor_init() ensures we don't corrupt random memory should a buggy ->alloc_msg() return an unfit ceph_msg. While at it, I changed the "unknown tid" dout() to a pr_warn() to make sure all skips are seen and unified format strings. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 36757d4..525f454 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2337,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con) return ret; BUG_ON(!con->in_msg ^ skip); - if (con->in_msg && data_len > con->in_msg->data_length) { - pr_warn("%s skipping long message (%u > %zd)\n", - __func__, data_len, con->in_msg->data_length); - ceph_msg_put(con->in_msg); - con->in_msg = NULL; - skip = 1; - } if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5003367..80b94e3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2817,8 +2817,9 @@ out: } /* - * lookup and return message for incoming reply. set up reply message - * pages. + * Lookup and return message for incoming reply. Don't try to do + * anything about a larger than preallocated data portion of the + * message at the moment - for now, just skip the message. */ static struct ceph_msg *get_reply(struct ceph_connection *con, struct ceph_msg_header *hdr, @@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); if (!req) { - *skip = 1; + pr_warn("%s osd%d tid %llu unknown, skipping\n", + __func__, osd->o_osd, tid); m = NULL; - dout("get_reply unknown tid %llu from osd%d\n", tid, - osd->o_osd); + *skip = 1; goto out; } @@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_revoke_incoming(req->r_reply); if (front_len > req->r_reply->front_alloc_len) { - pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", - front_len, req->r_reply->front_alloc_len, - (unsigned int)con->peer_name.type, - le64_to_cpu(con->peer_name.num)); + pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", + __func__, osd->o_osd, req->r_tid, front_len, + req->r_reply->front_alloc_len); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, false); if (!m) @@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_put(req->r_reply); req->r_reply = m; } - m = ceph_msg_get(req->r_reply); - - if (data_len > 0) { - struct ceph_osd_data *osd_data; - /* - * XXX This is assuming there is only one op containing - * XXX page data. Probably OK for reads, but this - * XXX ought to be done more generally. - */ - osd_data = osd_req_op_extent_osd_data(req, 0); - if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { - if (osd_data->pages && - unlikely(osd_data->length < data_len)) { - - pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", - tid, data_len, osd_data->length); - *skip = 1; - ceph_msg_put(m); - m = NULL; - goto out; - } - } + if (data_len > req->r_reply->data_length) { + pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", + __func__, osd->o_osd, req->r_tid, data_len, + req->r_reply->data_length); + m = NULL; + *skip = 1; + goto out; } - *skip = 0; + + m = ceph_msg_get(req->r_reply); dout("get_reply tid %lld %p\n", tid, m); out: mutex_unlock(&osdc->request_mutex); return m; - } static struct ceph_msg *alloc_msg(struct ceph_connection *con, -- cgit v0.10.2 From 55b0b31cbc09f80db384671e22cdc94b2aa26b29 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 7 Sep 2015 11:35:01 +0800 Subject: ceph: get inode size for each append write Signed-off-by: Yan, Zheng diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 90ec110..0c62868 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -952,6 +952,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); + if (iocb->ki_flags & IOCB_APPEND) { + err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); + if (err < 0) + goto out; + } + err = generic_write_checks(iocb, from); if (err <= 0) goto out; -- cgit v0.10.2 From 438386853d4c0c48fe73bf05a7d61c70ca5a3bfb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 7 Sep 2015 15:46:24 +0800 Subject: ceph: improve readahead for file holes When readahead encounters file holes, osd reply returns error -ENOENT, finish_read() skips adding pages to the the page cache. So readahead does not work for file holes. The fix is adding zero pages to the page cache when -ENOENT is returned. Signed-off-by: Yan, Zheng diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9893335..6471e28 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -276,7 +276,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) for (i = 0; i < num_pages; i++) { struct page *page = osd_data->pages[i]; - if (rc < 0) + if (rc < 0 && rc != ENOENT) goto unlock; if (bytes < (int)PAGE_CACHE_SIZE) { /* zero (remainder of) page */ -- cgit v0.10.2 From 721b51fcf91898299d96f4b72cb9434cda29dce6 Mon Sep 17 00:00:00 2001 From: John Lin Date: Wed, 9 Sep 2015 16:47:48 +0800 Subject: ASoC: rt5645: Add struct dmi_system_id "Google Ultima" for chrome platform Signed-off-by: John Lin Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 610eacd..e620da1 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3236,6 +3236,13 @@ static struct dmi_system_id dmi_platform_intel_braswell[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), }, }, + { + .ident = "Google Ultima", + .callback = strago_quirk_cb, + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Ultima"), + }, + }, { } }; -- cgit v0.10.2 From f1ec5ec7a94ba8138f9cbdc1e9e3b03aee29c4df Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 9 Sep 2015 12:10:31 +0100 Subject: ASoC: intel: Fix SSP port configuration after RTD3 resume. Currently the SSP port settings are being clobbered as part of the DSP RTD3 restore logic. make sure we save the correct params and restore them at resume. The FW sadly does not save SSP settings as part of the PM context. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/intel/haswell/sst-haswell-ipc.c b/sound/soc/intel/haswell/sst-haswell-ipc.c index f95f271..5bc9855 100644 --- a/sound/soc/intel/haswell/sst-haswell-ipc.c +++ b/sound/soc/intel/haswell/sst-haswell-ipc.c @@ -302,6 +302,10 @@ struct sst_hsw { struct sst_hsw_ipc_dx_reply dx; void *dx_context; dma_addr_t dx_context_paddr; + enum sst_hsw_device_id dx_dev; + enum sst_hsw_device_mclk dx_mclk; + enum sst_hsw_device_mode dx_mode; + u32 dx_clock_divider; /* boot */ wait_queue_head_t boot_wait; @@ -1400,10 +1404,10 @@ int sst_hsw_device_set_config(struct sst_hsw *hsw, trace_ipc_request("set device config", dev); - config.ssp_interface = dev; - config.clock_frequency = mclk; - config.mode = mode; - config.clock_divider = clock_divider; + hsw->dx_dev = config.ssp_interface = dev; + hsw->dx_mclk = config.clock_frequency = mclk; + hsw->dx_mode = config.mode = mode; + hsw->dx_clock_divider = config.clock_divider = clock_divider; if (mode == SST_HSW_DEVICE_TDM_CLOCK_MASTER) config.channels = 4; else @@ -1704,10 +1708,10 @@ int sst_hsw_dsp_runtime_resume(struct sst_hsw *hsw) return -EIO; } - /* Set ADSP SSP port settings */ - ret = sst_hsw_device_set_config(hsw, SST_HSW_DEVICE_SSP_0, - SST_HSW_DEVICE_MCLK_FREQ_24_MHZ, - SST_HSW_DEVICE_CLOCK_MASTER, 9); + /* Set ADSP SSP port settings - sadly the FW does not store SSP port + settings as part of the PM context. */ + ret = sst_hsw_device_set_config(hsw, hsw->dx_dev, hsw->dx_mclk, + hsw->dx_mode, hsw->dx_clock_divider); if (ret < 0) dev_err(dev, "error: SSP re-initialization failed\n"); -- cgit v0.10.2 From ddab2c0eaf57b2de0b2516a477e3ce0f7554509b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 9 Sep 2015 22:03:30 +0300 Subject: tools/virtio: fix build after 4.2 changes more stubs, mostly Signed-off-by: Michael S. Tsirkin diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index aff61e1..26b7926 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -3,6 +3,8 @@ #define mb() __sync_synchronize() #define smp_mb() mb() +# define dma_rmb() barrier() +# define dma_wmb() barrier() # define smp_rmb() barrier() # define smp_wmb() barrier() /* Weak barriers should be used. If not - it's a bug */ diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h new file mode 100644 index 0000000..416875e --- /dev/null +++ b/tools/virtio/linux/export.h @@ -0,0 +1,3 @@ +#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym +#define EXPORT_SYMBOL(sym) extern typeof(sym) sym + diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 1e8ce69..0a3da64 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -22,6 +22,7 @@ typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; +typedef unsigned int __wsum; struct page { unsigned long long dummy; @@ -47,6 +48,13 @@ static inline void *kmalloc(size_t s, gfp_t gfp) return __kmalloc_fake; return malloc(s); } +static inline void *kzalloc(size_t s, gfp_t gfp) +{ + void *p = kmalloc(s, gfp); + + memset(p, 0, s); + return p; +} static inline void kfree(void *p) { -- cgit v0.10.2 From f15d7114bbdd594f1d8b644a44f647d2b6e9772e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 29 Jun 2015 11:46:17 -0500 Subject: Documentation/watchdog: add timeout and ping rate control to watchdog-test.c The watchdog test program is much more useful if it can configure the timeout value and ping rate. This will allow you to test actual timeouts. Adds the -t parameter to set the timeout value (in seconds), and -p to set the ping rate (number of seconds between pings). Signed-off-by: Timur Tabi Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c index 3da8229..fcdde8f 100644 --- a/Documentation/watchdog/src/watchdog-test.c +++ b/Documentation/watchdog/src/watchdog-test.c @@ -41,6 +41,7 @@ static void term(int sig) int main(int argc, char *argv[]) { int flags; + unsigned int ping_rate = 1; fd = open("/dev/watchdog", O_WRONLY); @@ -63,22 +64,33 @@ int main(int argc, char *argv[]) fprintf(stderr, "Watchdog card enabled.\n"); fflush(stderr); goto end; + } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) { + flags = atoi(argv[2]); + ioctl(fd, WDIOC_SETTIMEOUT, &flags); + fprintf(stderr, "Watchdog timeout set to %u seconds.\n", flags); + fflush(stderr); + goto end; + } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) { + ping_rate = strtoul(argv[2], NULL, 0); + fprintf(stderr, "Watchdog ping rate set to %u seconds.\n", ping_rate); + fflush(stderr); } else { - fprintf(stderr, "-d to disable, -e to enable.\n"); + fprintf(stderr, "-d to disable, -e to enable, -t to set " \ + "the timeout,\n-p to set the ping rate, and \n"); fprintf(stderr, "run by itself to tick the card.\n"); fflush(stderr); goto end; } - } else { - fprintf(stderr, "Watchdog Ticking Away!\n"); - fflush(stderr); } + fprintf(stderr, "Watchdog Ticking Away!\n"); + fflush(stderr); + signal(SIGINT, term); while(1) { keep_alive(); - sleep(1); + sleep(ping_rate); } end: close(fd); -- cgit v0.10.2 From 8a340dbbc4b10fe07a924e91979bfc93e966dd65 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 23 Jul 2015 17:21:16 -0300 Subject: watchdog: imgpdc: Unregister restart handler on remove Commit c631f20068 ("watchdog: imgpdc: Add reboot support") introduced a restart handler but forgot to unregister it on driver removal. Fix it. Fixes: c631f20068 ("watchdog: imgpdc: Add reboot support") Reported-by: Ariel D'Alessandro Signed-off-by: Ezequiel Garcia Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c index 0f73621..15ab072 100644 --- a/drivers/watchdog/imgpdc_wdt.c +++ b/drivers/watchdog/imgpdc_wdt.c @@ -316,6 +316,7 @@ static int pdc_wdt_remove(struct platform_device *pdev) { struct pdc_wdt_dev *pdc_wdt = platform_get_drvdata(pdev); + unregister_restart_handler(&pdc_wdt->restart_handler); pdc_wdt_stop(&pdc_wdt->wdt_dev); watchdog_unregister_device(&pdc_wdt->wdt_dev); clk_disable_unprepare(pdc_wdt->wdt_clk); -- cgit v0.10.2 From 9fab06920c9207aca8ff4e2416bb10cabc19294a Mon Sep 17 00:00:00 2001 From: Greta Zhang Date: Fri, 24 Jul 2015 15:28:45 +0800 Subject: watchdog: mtk_wdt: add suspend/resume support add mediatek watchdog driver suspend/resume support Signed-off-by: Greta Zhang Signed-off-by: Roger Lu Signed-off-by: Eddie Huang Acked-by: Matthias Brugger Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index 938b987..056412c 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -221,17 +221,47 @@ static int mtk_wdt_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int mtk_wdt_suspend(struct device *dev) +{ + struct mtk_wdt_dev *mtk_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&mtk_wdt->wdt_dev)) + mtk_wdt_stop(&mtk_wdt->wdt_dev); + + return 0; +} + +static int mtk_wdt_resume(struct device *dev) +{ + struct mtk_wdt_dev *mtk_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&mtk_wdt->wdt_dev)) { + mtk_wdt_start(&mtk_wdt->wdt_dev); + mtk_wdt_ping(&mtk_wdt->wdt_dev); + } + + return 0; +} +#endif + static const struct of_device_id mtk_wdt_dt_ids[] = { { .compatible = "mediatek,mt6589-wdt" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, mtk_wdt_dt_ids); +static const struct dev_pm_ops mtk_wdt_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mtk_wdt_suspend, + mtk_wdt_resume) +}; + static struct platform_driver mtk_wdt_driver = { .probe = mtk_wdt_probe, .remove = mtk_wdt_remove, .driver = { .name = DRV_NAME, + .pm = &mtk_wdt_pm_ops, .of_match_table = mtk_wdt_dt_ids, }, }; -- cgit v0.10.2 From 5724485b185a4ac4bb96149718ff736c5ef5c169 Mon Sep 17 00:00:00 2001 From: Greta Zhang Date: Fri, 24 Jul 2015 15:28:46 +0800 Subject: watchdog: mtk_wdt: add wdt shutdown callback to disable wdt if enabled Without .shutdown(), watchdog might reset the system during power off. For example, if watchdog's timeout is set to 30s, then it is reset to zero by mtk_wdt_ping(). During power off, no app will ping watchdog, but watchdog is still running and may trigger reset. Signed-off-by: Greta Zhang Signed-off-by: Eddie Huang Acked-by: Matthias Brugger Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index 056412c..6ad9df9 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -210,6 +210,14 @@ static int mtk_wdt_probe(struct platform_device *pdev) return 0; } +static void mtk_wdt_shutdown(struct platform_device *pdev) +{ + struct mtk_wdt_dev *mtk_wdt = platform_get_drvdata(pdev); + + if (watchdog_active(&mtk_wdt->wdt_dev)) + mtk_wdt_stop(&mtk_wdt->wdt_dev); +} + static int mtk_wdt_remove(struct platform_device *pdev) { struct mtk_wdt_dev *mtk_wdt = platform_get_drvdata(pdev); @@ -259,6 +267,7 @@ static const struct dev_pm_ops mtk_wdt_pm_ops = { static struct platform_driver mtk_wdt_driver = { .probe = mtk_wdt_probe, .remove = mtk_wdt_remove, + .shutdown = mtk_wdt_shutdown, .driver = { .name = DRV_NAME, .pm = &mtk_wdt_pm_ops, -- cgit v0.10.2 From 0919e4445190da18496d31aac08b90828a47d45f Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Sat, 25 Jul 2015 08:25:18 +0200 Subject: watchdog: sunxi: fix activation of system reset Commit f2147de33470 ("watchdog: sunxi: support parameterized compatible strings") introduced a regression in sunxi_wdt_start(), by which the system reset function of the watchdog is not enabled upon starting the watchdog. As a result, the system is not reset when the watchdog expires. Fix it. Fixes: f2147de33470 ("watchdog: sunxi: support parameterized compatible strings") Signed-off-by: Francesco Lavra Acked-by: Maxime Ripard Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Cc: stable@vger.kernel.org diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index a29afb3..47bd8a1 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -184,7 +184,7 @@ static int sunxi_wdt_start(struct watchdog_device *wdt_dev) /* Set system reset function */ reg = readl(wdt_base + regs->wdt_cfg); reg &= ~(regs->wdt_reset_mask); - reg |= ~(regs->wdt_reset_val); + reg |= regs->wdt_reset_val; writel(reg, wdt_base + regs->wdt_cfg); /* Enable watchdog */ -- cgit v0.10.2 From 64307b48f79f35d28ed6b44e20b773bc00a0152e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 27 Jul 2015 12:03:30 -0400 Subject: watchdog: (nv_tco) add support for MCP79 Tested on the Nvidia chipset with an SMBus controller PCI ID 0x0AA2 (as shown in the PCI listing during the boot sequence). Signed-off-by: Vivien Didelot Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index c028454..bd917bb 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -294,6 +294,8 @@ static const struct pci_device_id tco_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SMBUS, PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS, + PCI_ANY_ID, PCI_ANY_ID, }, { 0, }, /* End of list */ }; MODULE_DEVICE_TABLE(pci, tco_pci_tbl); -- cgit v0.10.2 From fa928ee8d4af1f70eee77a5ac77c084a0715eb9e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 29 Jul 2015 09:45:36 -0700 Subject: watchdog: booke_wdt: Use infrastructure to check timeout limits The watchdog infrastructure checks the maximum timeout for us. Use it. Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index e96b09b..04da4b6 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -186,8 +186,6 @@ static int booke_wdt_stop(struct watchdog_device *wdog) static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev, unsigned int timeout) { - if (timeout > MAX_WDT_TIMEOUT) - return -EINVAL; wdt_dev->timeout = timeout; booke_wdt_set(wdt_dev); @@ -211,7 +209,6 @@ static struct watchdog_device booke_wdt_dev = { .info = &booke_wdt_info, .ops = &booke_wdt_ops, .min_timeout = 1, - .max_timeout = 0xFFFF }; static void __exit booke_wdt_exit(void) @@ -229,6 +226,7 @@ static int __init booke_wdt_init(void) booke_wdt_set_timeout(&booke_wdt_dev, period_to_sec(booke_wdt_period)); watchdog_set_nowayout(&booke_wdt_dev, nowayout); + booke_wdt_dev.max_timeout = MAX_WDT_TIMEOUT; if (booke_wdt_enabled) booke_wdt_start(&booke_wdt_dev); -- cgit v0.10.2 From ab54d7f017772e89964d4040937a83cd4468562a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 31 Jul 2015 11:39:39 +0200 Subject: Documentation: watchdog: at91sam9_wdt: add clocks property The watchdog has an input clock, the slow clock. It is required as it will not function without it. Signed-off-by: Alexandre Belloni Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt index a4d8697..86fa6de 100644 --- a/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt @@ -6,6 +6,7 @@ Required properties: - compatible: must be "atmel,at91sam9260-wdt". - reg: physical base address of the controller and length of memory mapped region. +- clocks: phandle to input clock. Optional properties: - timeout-sec: contains the watchdog timeout in seconds. @@ -39,6 +40,7 @@ Example: compatible = "atmel,at91sam9260-wdt"; reg = <0xfffffd40 0x10>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + clocks = <&clk32k>; timeout-sec = <15>; atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; -- cgit v0.10.2 From 0a0a542f6bbb2ebe956f0117c842302442ef52da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 30 Jul 2015 11:32:23 +0200 Subject: watchdog: gpio-wdt: be more strict about hw_algo matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strncmp(algo, "toggle", 6) doesn't compare the trailing '\0' byte, so using hw_algo = "toggleboggle" is recognized the same way as hw_algo = "toggle" . While this doesn't introduce any problems for a device tree that sticks to the documented settings it's still ugly. Fix this by using strcmp to only match on "toggle" and "level". Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index 1687cc2..57d30f1 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -182,10 +182,10 @@ static int gpio_wdt_probe(struct platform_device *pdev) ret = of_property_read_string(pdev->dev.of_node, "hw_algo", &algo); if (ret) return ret; - if (!strncmp(algo, "toggle", 6)) { + if (!strcmp(algo, "toggle")) { priv->hw_algo = HW_ALGO_TOGGLE; f = GPIOF_IN; - } else if (!strncmp(algo, "level", 5)) { + } else if (!strcmp(algo, "level")) { priv->hw_algo = HW_ALGO_LEVEL; f = priv->active_low ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW; } else { -- cgit v0.10.2 From 4f2d0b2d1b31cbe704c8f94e74e46cb64187ab0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 31 Jul 2015 09:21:36 +0200 Subject: watchdog: gpio-wdt: ping already at startup for always running devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During probe for an always-running watchdog a timer is setup to constantly ping the watchdog while the device is not open. The gpio to ping the watchdog is setup to inactive. For a watchdog with hw_algo = "toggle" this results in a ping depending on the initial state of the gpio, for hw_algo = "level" no ping is generated. Make sure that the first automatic ping is sent immediately and not only when the timer expires the first time. This makes the machine survive in case more than half of the watchdog timeout is already elapsed. (Which is very probable for the chip I'm faced with that has a timeout of one second.) Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index 57d30f1..5e16b09 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -50,12 +50,41 @@ static void gpio_wdt_disable(struct gpio_wdt_priv *priv) gpio_direction_input(priv->gpio); } +static void gpio_wdt_hwping(unsigned long data) +{ + struct watchdog_device *wdd = (struct watchdog_device *)data; + struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); + + if (priv->armed && time_after(jiffies, priv->last_jiffies + + msecs_to_jiffies(wdd->timeout * 1000))) { + dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n"); + return; + } + + /* Restart timer */ + mod_timer(&priv->timer, jiffies + priv->hw_margin); + + switch (priv->hw_algo) { + case HW_ALGO_TOGGLE: + /* Toggle output pin */ + priv->state = !priv->state; + gpio_set_value_cansleep(priv->gpio, priv->state); + break; + case HW_ALGO_LEVEL: + /* Pulse */ + gpio_set_value_cansleep(priv->gpio, !priv->active_low); + udelay(1); + gpio_set_value_cansleep(priv->gpio, priv->active_low); + break; + } +} + static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv) { priv->state = priv->active_low; gpio_direction_output(priv->gpio, priv->state); priv->last_jiffies = jiffies; - mod_timer(&priv->timer, priv->last_jiffies + priv->hw_margin); + gpio_wdt_hwping((unsigned long)&priv->wdd); } static int gpio_wdt_start(struct watchdog_device *wdd) @@ -97,35 +126,6 @@ static int gpio_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t) return gpio_wdt_ping(wdd); } -static void gpio_wdt_hwping(unsigned long data) -{ - struct watchdog_device *wdd = (struct watchdog_device *)data; - struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); - - if (priv->armed && time_after(jiffies, priv->last_jiffies + - msecs_to_jiffies(wdd->timeout * 1000))) { - dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n"); - return; - } - - /* Restart timer */ - mod_timer(&priv->timer, jiffies + priv->hw_margin); - - switch (priv->hw_algo) { - case HW_ALGO_TOGGLE: - /* Toggle output pin */ - priv->state = !priv->state; - gpio_set_value_cansleep(priv->gpio, priv->state); - break; - case HW_ALGO_LEVEL: - /* Pulse */ - gpio_set_value_cansleep(priv->gpio, !priv->active_low); - udelay(1); - gpio_set_value_cansleep(priv->gpio, priv->active_low); - break; - } -} - static int gpio_wdt_notify_sys(struct notifier_block *nb, unsigned long code, void *unused) { -- cgit v0.10.2 From 7c25f8c9f67708e6464d2221bc311cbd99e950dc Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro Date: Sat, 1 Aug 2015 15:37:16 -0300 Subject: watchdog: NXP LPC18xx Watchdog Timer Driver This commit adds support for the watchdog timer found in NXP LPC SoCs family, which includes LPC18xx/LPC43xx. Other SoCs in that family may share the same watchdog hardware. Watchdog driver registers a restart handler that will restart the system by performing an incorrect feed after ensuring the watchdog is enabled in reset mode. As watchdog cannot be disabled in hardware, driver's stop routine will regularly send a keepalive ping using a timer. Signed-off-by: Ariel D'Alessandro Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 241fafd..b69bd67 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -558,6 +558,17 @@ config DIGICOLOR_WATCHDOG To compile this driver as a module, choose M here: the module will be called digicolor_wdt. +config LPC18XX_WATCHDOG + tristate "LPC18xx/43xx Watchdog" + depends on ARCH_LPC18XX || COMPILE_TEST + select WATCHDOG_CORE + help + Say Y here if to include support for the watchdog timer + in NXP LPC SoCs family, which includes LPC18xx/LPC43xx + processors. + To compile this driver as a module, choose M here: the + module will be called lpc18xx_wdt. + # AVR32 Architecture config AT32AP700X_WDT diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 59ea9a1..1b0ef48 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_TEGRA_WATCHDOG) += tegra_wdt.o obj-$(CONFIG_MESON_WATCHDOG) += meson_wdt.o obj-$(CONFIG_MEDIATEK_WATCHDOG) += mtk_wdt.o obj-$(CONFIG_DIGICOLOR_WATCHDOG) += digicolor_wdt.o +obj-$(CONFIG_LPC18XX_WATCHDOG) += lpc18xx_wdt.o # AVR32 Architecture obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c new file mode 100644 index 0000000..00ff5bd --- /dev/null +++ b/drivers/watchdog/lpc18xx_wdt.c @@ -0,0 +1,340 @@ +/* + * NXP LPC18xx Watchdog Timer (WDT) + * + * Copyright (c) 2015 Ariel D'Alessandro + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * Notes + * ----- + * The Watchdog consists of a fixed divide-by-4 clock pre-scaler and a 24-bit + * counter which decrements on every clock cycle. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define LPC18XX_WDT_MOD 0x00 +#define LPC18XX_WDT_MOD_WDEN BIT(0) +#define LPC18XX_WDT_MOD_WDRESET BIT(1) + +#define LPC18XX_WDT_TC 0x04 +#define LPC18XX_WDT_TC_MIN 0xff +#define LPC18XX_WDT_TC_MAX 0xffffff + +#define LPC18XX_WDT_FEED 0x08 +#define LPC18XX_WDT_FEED_MAGIC1 0xaa +#define LPC18XX_WDT_FEED_MAGIC2 0x55 + +#define LPC18XX_WDT_TV 0x0c + +/* Clock pre-scaler */ +#define LPC18XX_WDT_CLK_DIV 4 + +/* Timeout values in seconds */ +#define LPC18XX_WDT_DEF_TIMEOUT 30U + +static int heartbeat; +module_param(heartbeat, int, 0); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds (default=" + __MODULE_STRING(LPC18XX_WDT_DEF_TIMEOUT) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +struct lpc18xx_wdt_dev { + struct watchdog_device wdt_dev; + struct clk *reg_clk; + struct clk *wdt_clk; + unsigned long clk_rate; + void __iomem *base; + struct timer_list timer; + struct notifier_block restart_handler; + spinlock_t lock; +}; + +static int lpc18xx_wdt_feed(struct watchdog_device *wdt_dev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); + unsigned long flags; + + /* + * An abort condition will occur if an interrupt happens during the feed + * sequence. + */ + spin_lock_irqsave(&lpc18xx_wdt->lock, flags); + writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + writel(LPC18XX_WDT_FEED_MAGIC2, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + spin_unlock_irqrestore(&lpc18xx_wdt->lock, flags); + + return 0; +} + +static void lpc18xx_wdt_timer_feed(unsigned long data) +{ + struct watchdog_device *wdt_dev = (struct watchdog_device *)data; + struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); + + lpc18xx_wdt_feed(wdt_dev); + + /* Use safe value (1/2 of real timeout) */ + mod_timer(&lpc18xx_wdt->timer, jiffies + + msecs_to_jiffies((wdt_dev->timeout * MSEC_PER_SEC) / 2)); +} + +/* + * Since LPC18xx Watchdog cannot be disabled in hardware, we must keep feeding + * it with a timer until userspace watchdog software takes over. + */ +static int lpc18xx_wdt_stop(struct watchdog_device *wdt_dev) +{ + lpc18xx_wdt_timer_feed((unsigned long)wdt_dev); + + return 0; +} + +static void __lpc18xx_wdt_set_timeout(struct lpc18xx_wdt_dev *lpc18xx_wdt) +{ + unsigned int val; + + val = DIV_ROUND_UP(lpc18xx_wdt->wdt_dev.timeout * lpc18xx_wdt->clk_rate, + LPC18XX_WDT_CLK_DIV); + writel(val, lpc18xx_wdt->base + LPC18XX_WDT_TC); +} + +static int lpc18xx_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int new_timeout) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); + + lpc18xx_wdt->wdt_dev.timeout = new_timeout; + __lpc18xx_wdt_set_timeout(lpc18xx_wdt); + + return 0; +} + +unsigned int lpc18xx_wdt_get_timeleft(struct watchdog_device *wdt_dev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); + unsigned int val; + + val = readl(lpc18xx_wdt->base + LPC18XX_WDT_TV); + return (val * LPC18XX_WDT_CLK_DIV) / lpc18xx_wdt->clk_rate; +} + +static int lpc18xx_wdt_start(struct watchdog_device *wdt_dev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); + unsigned int val; + + if (timer_pending(&lpc18xx_wdt->timer)) + del_timer(&lpc18xx_wdt->timer); + + val = readl(lpc18xx_wdt->base + LPC18XX_WDT_MOD); + val |= LPC18XX_WDT_MOD_WDEN; + val |= LPC18XX_WDT_MOD_WDRESET; + writel(val, lpc18xx_wdt->base + LPC18XX_WDT_MOD); + + /* + * Setting the WDEN bit in the WDMOD register is not sufficient to + * enable the Watchdog. A valid feed sequence must be completed after + * setting WDEN before the Watchdog is capable of generating a reset. + */ + lpc18xx_wdt_feed(wdt_dev); + + return 0; +} + +static struct watchdog_info lpc18xx_wdt_info = { + .identity = "NXP LPC18xx Watchdog", + .options = WDIOF_SETTIMEOUT | + WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, +}; + +static const struct watchdog_ops lpc18xx_wdt_ops = { + .owner = THIS_MODULE, + .start = lpc18xx_wdt_start, + .stop = lpc18xx_wdt_stop, + .ping = lpc18xx_wdt_feed, + .set_timeout = lpc18xx_wdt_set_timeout, + .get_timeleft = lpc18xx_wdt_get_timeleft, +}; + +static int lpc18xx_wdt_restart(struct notifier_block *this, unsigned long mode, + void *cmd) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = container_of(this, + struct lpc18xx_wdt_dev, restart_handler); + unsigned long flags; + int val; + + /* + * Incorrect feed sequence causes immediate watchdog reset if enabled. + */ + spin_lock_irqsave(&lpc18xx_wdt->lock, flags); + + val = readl(lpc18xx_wdt->base + LPC18XX_WDT_MOD); + val |= LPC18XX_WDT_MOD_WDEN; + val |= LPC18XX_WDT_MOD_WDRESET; + writel(val, lpc18xx_wdt->base + LPC18XX_WDT_MOD); + + writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + writel(LPC18XX_WDT_FEED_MAGIC2, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + + writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED); + + spin_unlock_irqrestore(&lpc18xx_wdt->lock, flags); + + return NOTIFY_OK; +} + +static int lpc18xx_wdt_probe(struct platform_device *pdev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt; + struct device *dev = &pdev->dev; + struct resource *res; + int ret; + + lpc18xx_wdt = devm_kzalloc(dev, sizeof(*lpc18xx_wdt), GFP_KERNEL); + if (!lpc18xx_wdt) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + lpc18xx_wdt->base = devm_ioremap_resource(dev, res); + if (IS_ERR(lpc18xx_wdt->base)) + return PTR_ERR(lpc18xx_wdt->base); + + lpc18xx_wdt->reg_clk = devm_clk_get(dev, "reg"); + if (IS_ERR(lpc18xx_wdt->reg_clk)) { + dev_err(dev, "failed to get the reg clock\n"); + return PTR_ERR(lpc18xx_wdt->reg_clk); + } + + lpc18xx_wdt->wdt_clk = devm_clk_get(dev, "wdtclk"); + if (IS_ERR(lpc18xx_wdt->wdt_clk)) { + dev_err(dev, "failed to get the wdt clock\n"); + return PTR_ERR(lpc18xx_wdt->wdt_clk); + } + + ret = clk_prepare_enable(lpc18xx_wdt->reg_clk); + if (ret) { + dev_err(dev, "could not prepare or enable sys clock\n"); + return ret; + } + + ret = clk_prepare_enable(lpc18xx_wdt->wdt_clk); + if (ret) { + dev_err(dev, "could not prepare or enable wdt clock\n"); + goto disable_reg_clk; + } + + /* We use the clock rate to calculate timeouts */ + lpc18xx_wdt->clk_rate = clk_get_rate(lpc18xx_wdt->wdt_clk); + if (lpc18xx_wdt->clk_rate == 0) { + dev_err(dev, "failed to get clock rate\n"); + ret = -EINVAL; + goto disable_wdt_clk; + } + + lpc18xx_wdt->wdt_dev.info = &lpc18xx_wdt_info; + lpc18xx_wdt->wdt_dev.ops = &lpc18xx_wdt_ops; + + lpc18xx_wdt->wdt_dev.min_timeout = DIV_ROUND_UP(LPC18XX_WDT_TC_MIN * + LPC18XX_WDT_CLK_DIV, lpc18xx_wdt->clk_rate); + + lpc18xx_wdt->wdt_dev.max_timeout = (LPC18XX_WDT_TC_MAX * + LPC18XX_WDT_CLK_DIV) / lpc18xx_wdt->clk_rate; + + lpc18xx_wdt->wdt_dev.timeout = min(lpc18xx_wdt->wdt_dev.max_timeout, + LPC18XX_WDT_DEF_TIMEOUT); + + spin_lock_init(&lpc18xx_wdt->lock); + + lpc18xx_wdt->wdt_dev.parent = dev; + watchdog_set_drvdata(&lpc18xx_wdt->wdt_dev, lpc18xx_wdt); + + ret = watchdog_init_timeout(&lpc18xx_wdt->wdt_dev, heartbeat, dev); + + __lpc18xx_wdt_set_timeout(lpc18xx_wdt); + + setup_timer(&lpc18xx_wdt->timer, lpc18xx_wdt_timer_feed, + (unsigned long)&lpc18xx_wdt->wdt_dev); + + watchdog_set_nowayout(&lpc18xx_wdt->wdt_dev, nowayout); + + platform_set_drvdata(pdev, lpc18xx_wdt); + + ret = watchdog_register_device(&lpc18xx_wdt->wdt_dev); + if (ret) + goto disable_wdt_clk; + + lpc18xx_wdt->restart_handler.notifier_call = lpc18xx_wdt_restart; + lpc18xx_wdt->restart_handler.priority = 128; + ret = register_restart_handler(&lpc18xx_wdt->restart_handler); + if (ret) + dev_warn(dev, "failed to register restart handler: %d\n", ret); + + return 0; + +disable_wdt_clk: + clk_disable_unprepare(lpc18xx_wdt->wdt_clk); +disable_reg_clk: + clk_disable_unprepare(lpc18xx_wdt->reg_clk); + return ret; +} + +static void lpc18xx_wdt_shutdown(struct platform_device *pdev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = platform_get_drvdata(pdev); + + lpc18xx_wdt_stop(&lpc18xx_wdt->wdt_dev); +} + +static int lpc18xx_wdt_remove(struct platform_device *pdev) +{ + struct lpc18xx_wdt_dev *lpc18xx_wdt = platform_get_drvdata(pdev); + + unregister_restart_handler(&lpc18xx_wdt->restart_handler); + + dev_warn(&pdev->dev, "I quit now, hardware will probably reboot!\n"); + del_timer(&lpc18xx_wdt->timer); + + watchdog_unregister_device(&lpc18xx_wdt->wdt_dev); + clk_disable_unprepare(lpc18xx_wdt->wdt_clk); + clk_disable_unprepare(lpc18xx_wdt->reg_clk); + + return 0; +} + +static const struct of_device_id lpc18xx_wdt_match[] = { + { .compatible = "nxp,lpc1850-wwdt" }, + {} +}; +MODULE_DEVICE_TABLE(of, lpc18xx_wdt_match); + +static struct platform_driver lpc18xx_wdt_driver = { + .driver = { + .name = "lpc18xx-wdt", + .of_match_table = lpc18xx_wdt_match, + }, + .probe = lpc18xx_wdt_probe, + .remove = lpc18xx_wdt_remove, + .shutdown = lpc18xx_wdt_shutdown, +}; +module_platform_driver(lpc18xx_wdt_driver); + +MODULE_AUTHOR("Ariel D'Alessandro "); +MODULE_DESCRIPTION("NXP LPC18xx Watchdog Timer Driver"); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From cfde37e1ec18dc68a52a1b882390c0f9c52b5f10 Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro Date: Sat, 1 Aug 2015 15:37:17 -0300 Subject: DT: watchdog: Add NXP LPC18xx Watchdog Timer binding documentation Add the devicetree binding document for NXP LPC18xx Watchdog Timer. Signed-off-by: Ariel D'Alessandro Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt b/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt new file mode 100644 index 0000000..09f6b24 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt @@ -0,0 +1,19 @@ +* NXP LPC18xx Watchdog Timer (WDT) + +Required properties: +- compatible: Should be "nxp,lpc1850-wwdt" +- reg: Should contain WDT registers location and length +- clocks: Must contain an entry for each entry in clock-names. +- clock-names: Should contain "wdtclk" and "reg"; the watchdog counter + clock and register interface clock respectively. +- interrupts: Should contain WDT interrupt + +Examples: + +watchdog@40080000 { + compatible = "nxp,lpc1850-wwdt"; + reg = <0x40080000 0x24>; + clocks = <&cgu BASE_SAFE_CLK>, <&ccu1 CLK_CPU_WWDT>; + clock-names = "wdtclk", "reg"; + interrupts = <49>; +}; -- cgit v0.10.2 From 6cd8a1b9f71290d58099101e65e7e2279bcdb212 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Fri, 7 Aug 2015 10:28:40 -0700 Subject: watchdog: lpc18xx_wdt_get_timeleft() can be static Signed-off-by: Fengguang Wu Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c index 00ff5bd..ab7b8b1 100644 --- a/drivers/watchdog/lpc18xx_wdt.c +++ b/drivers/watchdog/lpc18xx_wdt.c @@ -123,7 +123,7 @@ static int lpc18xx_wdt_set_timeout(struct watchdog_device *wdt_dev, return 0; } -unsigned int lpc18xx_wdt_get_timeleft(struct watchdog_device *wdt_dev) +static unsigned int lpc18xx_wdt_get_timeleft(struct watchdog_device *wdt_dev) { struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev); unsigned int val; -- cgit v0.10.2 From a57e06f7c6365f7d6e13f35933d3ef329205c1af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:52 +0200 Subject: watchdog: mpc8xxx: remove dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 689381a..8ad42b8 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -68,12 +68,6 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -/* - * We always prescale, but if someone really doesn't want to they can set this - * to 0 - */ -static int prescale = 1; - static DEFINE_SPINLOCK(wdt_spinlock); static void mpc8xxx_wdt_keepalive(void) @@ -101,11 +95,9 @@ static void mpc8xxx_wdt_timer_ping(unsigned long arg) static int mpc8xxx_wdt_start(struct watchdog_device *w) { - u32 tmp = SWCRR_SWEN; + u32 tmp = SWCRR_SWEN | SWCRR_SWPR; /* Good, fire up the show */ - if (prescale) - tmp |= SWCRR_SWPR; if (reset) tmp |= SWCRR_SWRI; @@ -179,10 +171,7 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) } /* Calculate the timeout in seconds */ - if (prescale) - timeout_sec = (timeout * wdt_type->prescaler) / freq; - else - timeout_sec = timeout / freq; + timeout_sec = (timeout * wdt_type->prescaler) / freq; mpc8xxx_wdt_dev.timeout = timeout_sec; #ifdef MODULE -- cgit v0.10.2 From 50ffb53ef2311e4631086d27783b00db6859c60e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:53 +0200 Subject: watchdog: mpc8xxx: simplify registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ef90174f8210 ("watchdog: watchdog_core: Add watchdog registration deferral mechanism") there is no need to delay the call to watchdog_register_device any more. So simplify the registration code accordingly. Resetting wd_base to NULL can the also be dropped because nothing depends on it being NULL to signal probe failure any more. (The matching wd_base = NULL in .remove was missing, too.) Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 8ad42b8..e0acda3 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -51,7 +51,6 @@ struct mpc8xxx_wdt_type { }; static struct mpc8xxx_wdt __iomem *wd_base; -static int mpc8xxx_wdt_init_late(void); static u16 timeout = 0xffff; module_param(timeout, ushort, 0); @@ -174,11 +173,14 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) timeout_sec = (timeout * wdt_type->prescaler) / freq; mpc8xxx_wdt_dev.timeout = timeout_sec; -#ifdef MODULE - ret = mpc8xxx_wdt_init_late(); - if (ret) + + watchdog_set_nowayout(&mpc8xxx_wdt_dev, nowayout); + + ret = watchdog_register_device(&mpc8xxx_wdt_dev); + if (ret) { + pr_err("cannot register watchdog device (err=%d)\n", ret); goto err_unmap; -#endif + } pr_info("WDT driver for MPC8xxx initialized. mode:%s timeout=%d (%d seconds)\n", reset ? "reset" : "interrupt", timeout, timeout_sec); @@ -193,7 +195,6 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) return 0; err_unmap: iounmap(wd_base); - wd_base = NULL; return ret; } @@ -242,31 +243,6 @@ static struct platform_driver mpc8xxx_wdt_driver = { }, }; -/* - * We do wdt initialization in two steps: arch_initcall probes the wdt - * very early to start pinging the watchdog (misc devices are not yet - * available), and later module_init() just registers the misc device. - */ -static int mpc8xxx_wdt_init_late(void) -{ - int ret; - - if (!wd_base) - return -ENODEV; - - watchdog_set_nowayout(&mpc8xxx_wdt_dev, nowayout); - - ret = watchdog_register_device(&mpc8xxx_wdt_dev); - if (ret) { - pr_err("cannot register watchdog device (err=%d)\n", ret); - return ret; - } - return 0; -} -#ifndef MODULE -module_init(mpc8xxx_wdt_init_late); -#endif - static int __init mpc8xxx_wdt_init(void) { return platform_driver_register(&mpc8xxx_wdt_driver); -- cgit v0.10.2 From f0ded83b96d27b27db0015e701cfd916330e3c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:54 +0200 Subject: watchdog: mpc8xxx: make use of of_device_get_match_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is new in v4.2-rc1 and makes a forward declaration of the match table superfluous which can so be removed. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index e0acda3..fb1fe967 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -139,7 +139,6 @@ static struct watchdog_device mpc8xxx_wdt_dev = { .ops = &mpc8xxx_wdt_ops, }; -static const struct of_device_id mpc8xxx_wdt_match[]; static int mpc8xxx_wdt_probe(struct platform_device *ofdev) { int ret; @@ -150,10 +149,9 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) bool enabled; unsigned int timeout_sec; - match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev); - if (!match) + wdt_type = of_device_get_match_data(&ofdev->dev); + if (!wdt_type) return -EINVAL; - wdt_type = match->data; if (!freq || freq == -1) return -EINVAL; -- cgit v0.10.2 From de5f71222bd544558d81701454eb457b295de96e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:55 +0200 Subject: watchdog: mpc8xxx: use devm_ioremap_resource to map memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simplifies the error paths and device unbinding. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index fb1fe967..a6790fc 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -142,8 +142,7 @@ static struct watchdog_device mpc8xxx_wdt_dev = { static int mpc8xxx_wdt_probe(struct platform_device *ofdev) { int ret; - const struct of_device_id *match; - struct device_node *np = ofdev->dev.of_node; + struct resource *res; const struct mpc8xxx_wdt_type *wdt_type; u32 freq = fsl_get_sys_freq(); bool enabled; @@ -156,15 +155,15 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) if (!freq || freq == -1) return -EINVAL; - wd_base = of_iomap(np, 0); - if (!wd_base) - return -ENOMEM; + res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); + wd_base = devm_ioremap_resource(&ofdev->dev, res); + if (IS_ERR(wd_base)) + return PTR_ERR(wd_base); enabled = in_be32(&wd_base->swcrr) & SWCRR_SWEN; if (!enabled && wdt_type->hw_enabled) { pr_info("could not be enabled in software\n"); - ret = -ENOSYS; - goto err_unmap; + return -ENOSYS; } /* Calculate the timeout in seconds */ @@ -177,7 +176,7 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) ret = watchdog_register_device(&mpc8xxx_wdt_dev); if (ret) { pr_err("cannot register watchdog device (err=%d)\n", ret); - goto err_unmap; + return ret; } pr_info("WDT driver for MPC8xxx initialized. mode:%s timeout=%d (%d seconds)\n", @@ -191,9 +190,6 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) if (enabled) mod_timer(&wdt_timer, jiffies); return 0; -err_unmap: - iounmap(wd_base); - return ret; } static int mpc8xxx_wdt_remove(struct platform_device *ofdev) @@ -202,7 +198,6 @@ static int mpc8xxx_wdt_remove(struct platform_device *ofdev) reset ? "reset" : "machine check exception"); del_timer_sync(&wdt_timer); watchdog_unregister_device(&mpc8xxx_wdt_dev); - iounmap(wd_base); return 0; } -- cgit v0.10.2 From 7997ebad4d747ff5561cb5ec0c7423e0d4e628d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:56 +0200 Subject: watchdog: mpc8xxx: use dynamic memory for device specific data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of relying on global static memory dynamically allocate the needed data. This has the benefit of some saved bytes if the driver is not in use and making it possible to bind more than one device (even though this has no known use case). Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index a6790fc..0b69797 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -50,7 +50,12 @@ struct mpc8xxx_wdt_type { bool hw_enabled; }; -static struct mpc8xxx_wdt __iomem *wd_base; +struct mpc8xxx_wdt_ddata { + struct mpc8xxx_wdt __iomem *base; + struct watchdog_device wdd; + struct timer_list timer; + spinlock_t lock; +}; static u16 timeout = 0xffff; module_param(timeout, ushort, 0); @@ -67,33 +72,29 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -static DEFINE_SPINLOCK(wdt_spinlock); - -static void mpc8xxx_wdt_keepalive(void) +static void mpc8xxx_wdt_keepalive(struct mpc8xxx_wdt_ddata *ddata) { /* Ping the WDT */ - spin_lock(&wdt_spinlock); - out_be16(&wd_base->swsrr, 0x556c); - out_be16(&wd_base->swsrr, 0xaa39); - spin_unlock(&wdt_spinlock); + spin_lock(&ddata->lock); + out_be16(&ddata->base->swsrr, 0x556c); + out_be16(&ddata->base->swsrr, 0xaa39); + spin_unlock(&ddata->lock); } -static struct watchdog_device mpc8xxx_wdt_dev; -static void mpc8xxx_wdt_timer_ping(unsigned long arg); -static DEFINE_TIMER(wdt_timer, mpc8xxx_wdt_timer_ping, 0, - (unsigned long)&mpc8xxx_wdt_dev); - static void mpc8xxx_wdt_timer_ping(unsigned long arg) { - struct watchdog_device *w = (struct watchdog_device *)arg; + struct mpc8xxx_wdt_ddata *ddata = (void *)arg; - mpc8xxx_wdt_keepalive(); + mpc8xxx_wdt_keepalive(ddata); /* We're pinging it twice faster than needed, just to be sure. */ - mod_timer(&wdt_timer, jiffies + HZ * w->timeout / 2); + mod_timer(&ddata->timer, jiffies + HZ * ddata->wdd.timeout / 2); } static int mpc8xxx_wdt_start(struct watchdog_device *w) { + struct mpc8xxx_wdt_ddata *ddata = + container_of(w, struct mpc8xxx_wdt_ddata, wdd); + u32 tmp = SWCRR_SWEN | SWCRR_SWPR; /* Good, fire up the show */ @@ -102,22 +103,28 @@ static int mpc8xxx_wdt_start(struct watchdog_device *w) tmp |= timeout << 16; - out_be32(&wd_base->swcrr, tmp); + out_be32(&ddata->base->swcrr, tmp); - del_timer_sync(&wdt_timer); + del_timer_sync(&ddata->timer); return 0; } static int mpc8xxx_wdt_ping(struct watchdog_device *w) { - mpc8xxx_wdt_keepalive(); + struct mpc8xxx_wdt_ddata *ddata = + container_of(w, struct mpc8xxx_wdt_ddata, wdd); + + mpc8xxx_wdt_keepalive(ddata); return 0; } static int mpc8xxx_wdt_stop(struct watchdog_device *w) { - mod_timer(&wdt_timer, jiffies); + struct mpc8xxx_wdt_ddata *ddata = + container_of(w, struct mpc8xxx_wdt_ddata, wdd); + + mod_timer(&ddata->timer, jiffies); return 0; } @@ -134,16 +141,12 @@ static struct watchdog_ops mpc8xxx_wdt_ops = { .stop = mpc8xxx_wdt_stop, }; -static struct watchdog_device mpc8xxx_wdt_dev = { - .info = &mpc8xxx_wdt_info, - .ops = &mpc8xxx_wdt_ops, -}; - static int mpc8xxx_wdt_probe(struct platform_device *ofdev) { int ret; struct resource *res; const struct mpc8xxx_wdt_type *wdt_type; + struct mpc8xxx_wdt_ddata *ddata; u32 freq = fsl_get_sys_freq(); bool enabled; unsigned int timeout_sec; @@ -155,25 +158,36 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) if (!freq || freq == -1) return -EINVAL; + ddata = devm_kzalloc(&ofdev->dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); - wd_base = devm_ioremap_resource(&ofdev->dev, res); - if (IS_ERR(wd_base)) - return PTR_ERR(wd_base); + ddata->base = devm_ioremap_resource(&ofdev->dev, res); + if (IS_ERR(ddata->base)) + return PTR_ERR(ddata->base); - enabled = in_be32(&wd_base->swcrr) & SWCRR_SWEN; + enabled = in_be32(&ddata->base->swcrr) & SWCRR_SWEN; if (!enabled && wdt_type->hw_enabled) { pr_info("could not be enabled in software\n"); return -ENOSYS; } + spin_lock_init(&ddata->lock); + setup_timer(&ddata->timer, mpc8xxx_wdt_timer_ping, + (unsigned long)ddata); + + ddata->wdd.info = &mpc8xxx_wdt_info, + ddata->wdd.ops = &mpc8xxx_wdt_ops, + /* Calculate the timeout in seconds */ timeout_sec = (timeout * wdt_type->prescaler) / freq; - mpc8xxx_wdt_dev.timeout = timeout_sec; + ddata->wdd.timeout = timeout_sec; - watchdog_set_nowayout(&mpc8xxx_wdt_dev, nowayout); + watchdog_set_nowayout(&ddata->wdd, nowayout); - ret = watchdog_register_device(&mpc8xxx_wdt_dev); + ret = watchdog_register_device(&ddata->wdd); if (ret) { pr_err("cannot register watchdog device (err=%d)\n", ret); return ret; @@ -188,16 +202,20 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) * userspace handles it. */ if (enabled) - mod_timer(&wdt_timer, jiffies); + mod_timer(&ddata->timer, jiffies); + + platform_set_drvdata(ofdev, ddata); return 0; } static int mpc8xxx_wdt_remove(struct platform_device *ofdev) { + struct mpc8xxx_wdt_ddata *ddata = platform_get_drvdata(ofdev); + pr_crit("Watchdog removed, expect the %s soon!\n", reset ? "reset" : "machine check exception"); - del_timer_sync(&wdt_timer); - watchdog_unregister_device(&mpc8xxx_wdt_dev); + del_timer_sync(&ddata->timer); + watchdog_unregister_device(&ddata->wdd); return 0; } -- cgit v0.10.2 From 72cd501e6a9b578cf99ae062477d9c7a938a3238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:57 +0200 Subject: watchdog: mpc8xxx: use better error code when watchdog cannot be enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit checkpatch warns about ENOSYS, telling "ENOSYS means 'invalid syscall nr' and nothing else". So use ENODEV instead. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 0b69797..5f2273a 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -170,7 +170,7 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) enabled = in_be32(&ddata->base->swcrr) & SWCRR_SWEN; if (!enabled && wdt_type->hw_enabled) { pr_info("could not be enabled in software\n"); - return -ENOSYS; + return -ENODEV; } spin_lock_init(&ddata->lock); -- cgit v0.10.2 From f8c33e9717150f7b8063d6d77a8ea90adad434e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Aug 2015 10:15:58 +0200 Subject: watchdog: mpc8xxx: allow to compile for MPC512x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MPC5125 processor features a watchdog device that is identical to the MPC8610 one. So allow to enable the driver for MPC512x kernel configurations. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b69bd67..cd99fcc 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1344,7 +1344,7 @@ config MPC5200_WDT config 8xxx_WDT tristate "MPC8xxx Platform Watchdog Timer" - depends on PPC_8xx || PPC_83xx || PPC_86xx + depends on PPC_8xx || PPC_83xx || PPC_86xx || PPC_MPC512x select WATCHDOG_CORE help This driver is for a SoC level watchdog that exists on some -- cgit v0.10.2 From 76534860f108b812926a4fbfbdadbfa9cdec89d0 Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Thu, 6 Aug 2015 18:16:46 +0800 Subject: watchdog: add a driver to support SAMA5D4 watchdog timer From SAMA5D4, the watchdog timer is upgrated with a new feature, which is describled as in the datasheet, "WDT_MR can be written until a LOCKMR command is issued in WDT_CR". That is to say, as long as the bootstrap and u-boot don't issue a LOCKMR command, WDT_MR can be written more than once in the driver. So the SAMA5D4 watchdog driver's implementation is different from the at91sam9260 watchdog driver implemented in file at91sam9_wdt.c. The user application open the device file to enable the watchdog timer hardware, and close to disable it, and set the watchdog timer timeout by seting WDV and WDD fields of WDT_MR register, and ping the watchdog by issuing WDRSTT command to WDT_CR register with hard-coded key. Signed-off-by: Wenyou Yang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index cd99fcc..ce0658c 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -188,6 +188,15 @@ config AT91SAM9X_WATCHDOG Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will reboot your system when the timeout is reached. +config SAMA5D4_WATCHDOG + tristate "Atmel SAMA5D4 Watchdog Timer" + depends on ARCH_AT91 + select WATCHDOG_CORE + help + Atmel SAMA5D4 watchdog timer is embedded into SAMA5D4 chips. + Its Watchdog Timer Mode Register can be written more than once. + This will reboot your system when the timeout is reached. + config CADENCE_WATCHDOG tristate "Cadence Watchdog Timer" depends on HAS_IOMEM diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 1b0ef48..0c616e3 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o +obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o diff --git a/drivers/watchdog/at91sam9_wdt.h b/drivers/watchdog/at91sam9_wdt.h index c6fbb2e6..b79a83b 100644 --- a/drivers/watchdog/at91sam9_wdt.h +++ b/drivers/watchdog/at91sam9_wdt.h @@ -22,11 +22,13 @@ #define AT91_WDT_MR 0x04 /* Watchdog Mode Register */ #define AT91_WDT_WDV (0xfff << 0) /* Counter Value */ +#define AT91_WDT_SET_WDV(x) ((x) & AT91_WDT_WDV) #define AT91_WDT_WDFIEN (1 << 12) /* Fault Interrupt Enable */ #define AT91_WDT_WDRSTEN (1 << 13) /* Reset Processor */ #define AT91_WDT_WDRPROC (1 << 14) /* Timer Restart */ #define AT91_WDT_WDDIS (1 << 15) /* Watchdog Disable */ #define AT91_WDT_WDD (0xfff << 16) /* Delta Value */ +#define AT91_WDT_SET_WDD(x) (((x) << 16) & AT91_WDT_WDD) #define AT91_WDT_WDDBGHLT (1 << 28) /* Debug Halt */ #define AT91_WDT_WDIDLEHLT (1 << 29) /* Idle Halt */ diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c new file mode 100644 index 0000000..a49634c --- /dev/null +++ b/drivers/watchdog/sama5d4_wdt.c @@ -0,0 +1,280 @@ +/* + * Driver for Atmel SAMA5D4 Watchdog Timer + * + * Copyright (C) 2015 Atmel Corporation + * + * Licensed under GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "at91sam9_wdt.h" + +/* minimum and maximum watchdog timeout, in seconds */ +#define MIN_WDT_TIMEOUT 1 +#define MAX_WDT_TIMEOUT 16 +#define WDT_DEFAULT_TIMEOUT MAX_WDT_TIMEOUT + +#define WDT_SEC2TICKS(s) ((s) ? (((s) << 8) - 1) : 0) + +struct sama5d4_wdt { + struct watchdog_device wdd; + void __iomem *reg_base; + u32 config; +}; + +static int wdt_timeout = WDT_DEFAULT_TIMEOUT; +static bool nowayout = WATCHDOG_NOWAYOUT; + +module_param(wdt_timeout, int, 0); +MODULE_PARM_DESC(wdt_timeout, + "Watchdog timeout in seconds. (default = " + __MODULE_STRING(WDT_DEFAULT_TIMEOUT) ")"); + +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define wdt_read(wdt, field) \ + readl_relaxed((wdt)->reg_base + (field)) + +#define wdt_write(wtd, field, val) \ + writel_relaxed((val), (wdt)->reg_base + (field)) + +static int sama5d4_wdt_start(struct watchdog_device *wdd) +{ + struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); + u32 reg; + + reg = wdt_read(wdt, AT91_WDT_MR); + reg &= ~AT91_WDT_WDDIS; + wdt_write(wdt, AT91_WDT_MR, reg); + + return 0; +} + +static int sama5d4_wdt_stop(struct watchdog_device *wdd) +{ + struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); + u32 reg; + + reg = wdt_read(wdt, AT91_WDT_MR); + reg |= AT91_WDT_WDDIS; + wdt_write(wdt, AT91_WDT_MR, reg); + + return 0; +} + +static int sama5d4_wdt_ping(struct watchdog_device *wdd) +{ + struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); + + wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT); + + return 0; +} + +static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd); + u32 value = WDT_SEC2TICKS(timeout); + u32 reg; + + reg = wdt_read(wdt, AT91_WDT_MR); + reg &= ~AT91_WDT_WDV; + reg &= ~AT91_WDT_WDD; + reg |= AT91_WDT_SET_WDV(value); + reg |= AT91_WDT_SET_WDD(value); + wdt_write(wdt, AT91_WDT_MR, reg); + + wdd->timeout = timeout; + + return 0; +} + +static const struct watchdog_info sama5d4_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING, + .identity = "Atmel SAMA5D4 Watchdog", +}; + +static struct watchdog_ops sama5d4_wdt_ops = { + .owner = THIS_MODULE, + .start = sama5d4_wdt_start, + .stop = sama5d4_wdt_stop, + .ping = sama5d4_wdt_ping, + .set_timeout = sama5d4_wdt_set_timeout, +}; + +static irqreturn_t sama5d4_wdt_irq_handler(int irq, void *dev_id) +{ + struct sama5d4_wdt *wdt = platform_get_drvdata(dev_id); + + if (wdt_read(wdt, AT91_WDT_SR)) { + pr_crit("Atmel Watchdog Software Reset\n"); + emergency_restart(); + pr_crit("Reboot didn't succeed\n"); + } + + return IRQ_HANDLED; +} + +static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt) +{ + const char *tmp; + + wdt->config = AT91_WDT_WDDIS; + + if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) && + !strcmp(tmp, "software")) + wdt->config |= AT91_WDT_WDFIEN; + else + wdt->config |= AT91_WDT_WDRSTEN; + + if (of_property_read_bool(np, "atmel,idle-halt")) + wdt->config |= AT91_WDT_WDIDLEHLT; + + if (of_property_read_bool(np, "atmel,dbg-halt")) + wdt->config |= AT91_WDT_WDDBGHLT; + + return 0; +} + +static int sama5d4_wdt_init(struct sama5d4_wdt *wdt) +{ + struct watchdog_device *wdd = &wdt->wdd; + u32 value = WDT_SEC2TICKS(wdd->timeout); + u32 reg; + + /* + * Because the fields WDV and WDD must not be modified when the WDDIS + * bit is set, so clear the WDDIS bit before writing the WDT_MR. + */ + reg = wdt_read(wdt, AT91_WDT_MR); + reg &= ~AT91_WDT_WDDIS; + wdt_write(wdt, AT91_WDT_MR, reg); + + reg = wdt->config; + reg |= AT91_WDT_SET_WDD(value); + reg |= AT91_WDT_SET_WDV(value); + + wdt_write(wdt, AT91_WDT_MR, reg); + + return 0; +} + +static int sama5d4_wdt_probe(struct platform_device *pdev) +{ + struct watchdog_device *wdd; + struct sama5d4_wdt *wdt; + struct resource *res; + void __iomem *regs; + u32 irq = 0; + int ret; + + wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdd = &wdt->wdd; + wdd->timeout = wdt_timeout; + wdd->info = &sama5d4_wdt_info; + wdd->ops = &sama5d4_wdt_ops; + wdd->min_timeout = MIN_WDT_TIMEOUT; + wdd->max_timeout = MAX_WDT_TIMEOUT; + + watchdog_set_drvdata(wdd, wdt); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + wdt->reg_base = regs; + + if (pdev->dev.of_node) { + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (!irq) + dev_warn(&pdev->dev, "failed to get IRQ from DT\n"); + + ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt); + if (ret) + return ret; + } + + if ((wdt->config & AT91_WDT_WDFIEN) && irq) { + ret = devm_request_irq(&pdev->dev, irq, sama5d4_wdt_irq_handler, + IRQF_SHARED | IRQF_IRQPOLL | + IRQF_NO_SUSPEND, pdev->name, pdev); + if (ret) { + dev_err(&pdev->dev, + "cannot register interrupt handler\n"); + return ret; + } + } + + ret = watchdog_init_timeout(wdd, wdt_timeout, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "unable to set timeout value\n"); + return ret; + } + + ret = sama5d4_wdt_init(wdt); + if (ret) + return ret; + + watchdog_set_nowayout(wdd, nowayout); + + ret = watchdog_register_device(wdd); + if (ret) { + dev_err(&pdev->dev, "failed to register watchdog device\n"); + return ret; + } + + platform_set_drvdata(pdev, wdt); + + dev_info(&pdev->dev, "initialized (timeout = %d sec, nowayout = %d)\n", + wdt_timeout, nowayout); + + return 0; +} + +static int sama5d4_wdt_remove(struct platform_device *pdev) +{ + struct sama5d4_wdt *wdt = platform_get_drvdata(pdev); + + sama5d4_wdt_stop(&wdt->wdd); + + watchdog_unregister_device(&wdt->wdd); + + return 0; +} + +static const struct of_device_id sama5d4_wdt_of_match[] = { + { .compatible = "atmel,sama5d4-wdt", }, + { } +}; +MODULE_DEVICE_TABLE(of, sama5d4_wdt_of_match); + +static struct platform_driver sama5d4_wdt_driver = { + .probe = sama5d4_wdt_probe, + .remove = sama5d4_wdt_remove, + .driver = { + .name = "sama5d4_wdt", + .of_match_table = sama5d4_wdt_of_match, + } +}; +module_platform_driver(sama5d4_wdt_driver); + +MODULE_AUTHOR("Atmel Corporation"); +MODULE_DESCRIPTION("Atmel SAMA5D4 Watchdog Timer driver"); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From f4fff94e3e3a712ef062c44b64ecf8f552f48ea4 Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Thu, 6 Aug 2015 18:17:05 +0800 Subject: Documentation: dt: binding: atmel-sama5d4-wdt: for SAMA5D4 watchdog driver The compatible "atmel,sama5d4-wdt" supports the SAMA5D4 watchdog driver and the watchdog's WDT_MR register can be written more than once. Signed-off-by: Wenyou Yang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt new file mode 100644 index 0000000..f7cc7c0 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt @@ -0,0 +1,35 @@ +* Atmel SAMA5D4 Watchdog Timer (WDT) Controller + +Required properties: +- compatible: "atmel,sama5d4-wdt" +- reg: base physical address and length of memory mapped region. + +Optional properties: +- timeout-sec: watchdog timeout value (in seconds). +- interrupts: interrupt number to the CPU. +- atmel,watchdog-type: should be "hardware" or "software". + "hardware": enable watchdog fault reset. A watchdog fault triggers + watchdog reset. + "software": enable watchdog fault interrupt. A watchdog fault asserts + watchdog interrupt. +- atmel,idle-halt: present if you want to stop the watchdog when the CPU is + in idle state. + CAUTION: This property should be used with care, it actually makes the + watchdog not counting when the CPU is in idle state, therefore the + watchdog reset time depends on mean CPU usage and will not reset at all + if the CPU stop working while it is in idle state, which is probably + not what you want. +- atmel,dbg-halt: present if you want to stop the watchdog when the CPU is + in debug state. + +Example: + watchdog@fc068640 { + compatible = "atmel,sama5d4-wdt"; + reg = <0xfc068640 0x10>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH 5>; + timeout-sec = <10>; + atmel,watchdog-type = "hardware"; + atmel,dbg-halt; + atmel,idle-halt; + status = "okay"; + }; -- cgit v0.10.2 From a97a09bd119fbdf4ba8c634fed8f4148d1def1e0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 16 Aug 2015 11:23:43 +0200 Subject: watchdog: at91sam9: get and use slow clock Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system hang") added a workaround for the slow clock as it is not properly handled by its users. Get and use the slow clock as it is necessary for the at91sam9 watchdog. Signed-off-by: Alexandre Belloni Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index e4698f7..7e6acaf 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -17,6 +17,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -90,6 +91,7 @@ struct at91wdt { unsigned long heartbeat; /* WDT heartbeat in jiffies */ bool nowayout; unsigned int irq; + struct clk *sclk; }; /* ......................................................................... */ @@ -352,15 +354,25 @@ static int __init at91wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); + wdt->sclk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(wdt->sclk)) + return PTR_ERR(wdt->sclk); + + err = clk_prepare_enable(wdt->sclk); + if (err) { + dev_err(&pdev->dev, "Could not enable slow clock\n"); + return err; + } + if (pdev->dev.of_node) { err = of_at91wdt_init(pdev->dev.of_node, wdt); if (err) - return err; + goto err_clk; } err = at91_wdt_init(pdev, wdt); if (err) - return err; + goto err_clk; platform_set_drvdata(pdev, wdt); @@ -368,6 +380,11 @@ static int __init at91wdt_probe(struct platform_device *pdev) wdt->wdd.timeout, wdt->nowayout); return 0; + +err_clk: + clk_disable_unprepare(wdt->sclk); + + return err; } static int __exit at91wdt_remove(struct platform_device *pdev) @@ -377,6 +394,7 @@ static int __exit at91wdt_remove(struct platform_device *pdev) pr_warn("I quit now, hardware will probably reboot!\n"); del_timer(&wdt->timer); + clk_disable_unprepare(wdt->sclk); return 0; } -- cgit v0.10.2 From bf5125d5e0759a0f513b1bcd33c15edc0cf4c17b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 17 Aug 2015 09:19:03 -0700 Subject: watchdog: at91rm9200: Correct check for syscon_node_to_regmap() errors syscon_node_to_regmap() returns a regmap or an ERR_PTR(). Signed-off-by: Bjorn Andersson Acked-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 41cecb5..464bf76 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -244,7 +244,7 @@ static int at91wdt_probe(struct platform_device *pdev) } regmap_st = syscon_node_to_regmap(parent->of_node); - if (!regmap_st) + if (IS_ERR(regmap_st)) return -ENODEV; res = misc_register(&at91wdt_miscdev); -- cgit v0.10.2 From 6551881c86c791237a3bebf11eb3bd70b60ea782 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Thu, 20 Aug 2015 14:05:01 +0530 Subject: Watchdog: Fix parent of watchdog_devices /sys/class/watchdog/watchdogn/device/modalias can help to identify the driver/module for a given watchdog node. However, many wdt devices do not set their parent and so, we do not see an entry for device in sysfs for such devices. This patch fixes parent of watchdog_device so that /sys/class/watchdog/watchdogn/device is populated. Exceptions: booke, diag288, octeon, softdog and w83627hf -- They do not have any parent. Not sure, how we can identify driver for these devices. Signed-off-by: Pratyush Anand Reviewed-by: Johannes Thumshirn Acked-by: Guenter Roeck Acked-by: H Hartley Sweeten Acked-by: Lee Jones Acked-by: Lubomir Rintel Acked-by: Maxime Coquelin Acked-by: Thierry Reding Acked-by: Viresh Kumar Acked-by: Linus Walleij Signed-off-by: Wim Van Sebroeck diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c index 2bc0f50..b346638 100644 --- a/drivers/misc/mei/wd.c +++ b/drivers/misc/mei/wd.c @@ -364,6 +364,7 @@ int mei_watchdog_register(struct mei_device *dev) int ret; + amt_wd_dev.parent = dev->dev; /* unlock to perserve correct locking order */ mutex_unlock(&dev->device_lock); ret = watchdog_register_device(&amt_wd_dev); diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index 7116968..66c3e65 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -182,6 +182,7 @@ static int bcm2835_wdt_probe(struct platform_device *pdev) watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt); watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev); watchdog_set_nowayout(&bcm2835_wdt_wdd, nowayout); + bcm2835_wdt_wdd.parent = &pdev->dev; err = watchdog_register_device(&bcm2835_wdt_wdd); if (err) { dev_err(dev, "Failed to register watchdog device"); diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index b28a072..4064a43 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -209,6 +209,7 @@ static int bcm47xx_wdt_probe(struct platform_device *pdev) wdt->wdd.info = &bcm47xx_wdt_info; wdt->wdd.timeout = WDT_DEFAULT_TIME; + wdt->wdd.parent = &pdev->dev; ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout); if (ret) goto err_timer; diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index 22d8ae6..e0c9842 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -319,6 +319,7 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) spin_lock_init(&wdt->lock); platform_set_drvdata(pdev, wdt); watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt); + bcm_kona_wdt_wdd.parent = &pdev->dev; ret = bcm_kona_wdt_set_timeout_reg(&bcm_kona_wdt_wdd, 0); if (ret) { diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index ce12f43..a099b77 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c @@ -358,6 +358,7 @@ static int __init coh901327_probe(struct platform_device *pdev) if (ret < 0) coh901327_wdt.timeout = 60; + coh901327_wdt.parent = &pdev->dev; ret = watchdog_register_device(&coh901327_wdt); if (ret == 0) dev_info(&pdev->dev, diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c index 2e95896..67e6797 100644 --- a/drivers/watchdog/da9052_wdt.c +++ b/drivers/watchdog/da9052_wdt.c @@ -195,6 +195,7 @@ static int da9052_wdt_probe(struct platform_device *pdev) da9052_wdt->timeout = DA9052_DEF_TIMEOUT; da9052_wdt->info = &da9052_wdt_info; da9052_wdt->ops = &da9052_wdt_ops; + da9052_wdt->parent = &pdev->dev; watchdog_set_drvdata(da9052_wdt, driver_data); kref_init(&driver_data->kref); diff --git a/drivers/watchdog/da9055_wdt.c b/drivers/watchdog/da9055_wdt.c index 495089d..04d1430 100644 --- a/drivers/watchdog/da9055_wdt.c +++ b/drivers/watchdog/da9055_wdt.c @@ -161,6 +161,7 @@ static int da9055_wdt_probe(struct platform_device *pdev) da9055_wdt->timeout = DA9055_DEF_TIMEOUT; da9055_wdt->info = &da9055_wdt_info; da9055_wdt->ops = &da9055_wdt_ops; + da9055_wdt->parent = &pdev->dev; watchdog_set_nowayout(da9055_wdt, nowayout); watchdog_set_drvdata(da9055_wdt, driver_data); diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index b3a870c..7386111 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -210,6 +210,7 @@ static int da9062_wdt_probe(struct platform_device *pdev) wdt->wdtdev.max_timeout = DA9062_WDT_MAX_TIMEOUT; wdt->wdtdev.timeout = DA9062_WDG_DEFAULT_TIMEOUT; wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS; + wdt->wdtdev.parent = &pdev->dev; watchdog_set_drvdata(&wdt->wdtdev, wdt); dev_set_drvdata(&pdev->dev, wdt); diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c index e2fe2eb..6bf130b 100644 --- a/drivers/watchdog/da9063_wdt.c +++ b/drivers/watchdog/da9063_wdt.c @@ -175,6 +175,7 @@ static int da9063_wdt_probe(struct platform_device *pdev) wdt->wdtdev.min_timeout = DA9063_WDT_MIN_TIMEOUT; wdt->wdtdev.max_timeout = DA9063_WDT_MAX_TIMEOUT; wdt->wdtdev.timeout = DA9063_WDG_TIMEOUT; + wdt->wdtdev.parent = &pdev->dev; wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS; diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c index cfdf8a4..17454ca 100644 --- a/drivers/watchdog/davinci_wdt.c +++ b/drivers/watchdog/davinci_wdt.c @@ -179,6 +179,7 @@ static int davinci_wdt_probe(struct platform_device *pdev) wdd->min_timeout = 1; wdd->max_timeout = MAX_HEARTBEAT; wdd->timeout = DEFAULT_HEARTBEAT; + wdd->parent = &pdev->dev; watchdog_init_timeout(wdd, heartbeat, dev); diff --git a/drivers/watchdog/digicolor_wdt.c b/drivers/watchdog/digicolor_wdt.c index 31d8e49..50abe1b 100644 --- a/drivers/watchdog/digicolor_wdt.c +++ b/drivers/watchdog/digicolor_wdt.c @@ -143,6 +143,7 @@ static int dc_wdt_probe(struct platform_device *pdev) } dc_wdt_wdd.max_timeout = U32_MAX / clk_get_rate(wdt->clk); dc_wdt_wdd.timeout = dc_wdt_wdd.max_timeout; + dc_wdt_wdd.parent = &pdev->dev; spin_lock_init(&wdt->lock); diff --git a/drivers/watchdog/ep93xx_wdt.c b/drivers/watchdog/ep93xx_wdt.c index 7a2cc71..0a4d7cc 100644 --- a/drivers/watchdog/ep93xx_wdt.c +++ b/drivers/watchdog/ep93xx_wdt.c @@ -132,6 +132,7 @@ static int ep93xx_wdt_probe(struct platform_device *pdev) val = readl(mmio_base + EP93XX_WATCHDOG); ep93xx_wdt_wdd.bootstatus = (val & 0x01) ? WDIOF_CARDRESET : 0; ep93xx_wdt_wdd.timeout = timeout; + ep93xx_wdt_wdd.parent = &pdev->dev; watchdog_set_nowayout(&ep93xx_wdt_wdd, nowayout); diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index 5e16b09..90d59d3 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -217,6 +217,7 @@ static int gpio_wdt_probe(struct platform_device *pdev) priv->wdd.ops = &gpio_wdt_ops; priv->wdd.min_timeout = SOFT_TIMEOUT_MIN; priv->wdd.max_timeout = SOFT_TIMEOUT_MAX; + priv->wdd.parent = &pdev->dev; if (watchdog_init_timeout(&priv->wdd, 0, &pdev->dev) < 0) priv->wdd.timeout = SOFT_TIMEOUT_DEF; diff --git a/drivers/watchdog/ie6xx_wdt.c b/drivers/watchdog/ie6xx_wdt.c index 9bc39ae..78c2541 100644 --- a/drivers/watchdog/ie6xx_wdt.c +++ b/drivers/watchdog/ie6xx_wdt.c @@ -267,6 +267,7 @@ static int ie6xx_wdt_probe(struct platform_device *pdev) ie6xx_wdt_dev.timeout = timeout; watchdog_set_nowayout(&ie6xx_wdt_dev, nowayout); + ie6xx_wdt_dev.parent = &pdev->dev; spin_lock_init(&ie6xx_wdt_data.unlock_sequence); diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c index 84f6701c..0a436b5 100644 --- a/drivers/watchdog/intel-mid_wdt.c +++ b/drivers/watchdog/intel-mid_wdt.c @@ -137,6 +137,7 @@ static int mid_wdt_probe(struct platform_device *pdev) wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN; wdt_dev->max_timeout = MID_WDT_TIMEOUT_MAX; wdt_dev->timeout = MID_WDT_DEFAULT_TIMEOUT; + wdt_dev->parent = &pdev->dev; watchdog_set_drvdata(wdt_dev, &pdev->dev); platform_set_drvdata(pdev, wdt_dev); diff --git a/drivers/watchdog/jz4740_wdt.c b/drivers/watchdog/jz4740_wdt.c index 4c2cc09..6a7d5c3 100644 --- a/drivers/watchdog/jz4740_wdt.c +++ b/drivers/watchdog/jz4740_wdt.c @@ -174,6 +174,7 @@ static int jz4740_wdt_probe(struct platform_device *pdev) jz4740_wdt->timeout = heartbeat; jz4740_wdt->min_timeout = 1; jz4740_wdt->max_timeout = MAX_HEARTBEAT; + jz4740_wdt->parent = &pdev->dev; watchdog_set_nowayout(jz4740_wdt, nowayout); watchdog_set_drvdata(jz4740_wdt, drvdata); diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c index d193a5e..6901300 100644 --- a/drivers/watchdog/mena21_wdt.c +++ b/drivers/watchdog/mena21_wdt.c @@ -197,6 +197,7 @@ static int a21_wdt_probe(struct platform_device *pdev) watchdog_init_timeout(&a21_wdt, 30, &pdev->dev); watchdog_set_nowayout(&a21_wdt, nowayout); watchdog_set_drvdata(&a21_wdt, drv); + a21_wdt.parent = &pdev->dev; reset = a21_wdt_get_bootstatus(drv); if (reset == 2) diff --git a/drivers/watchdog/menf21bmc_wdt.c b/drivers/watchdog/menf21bmc_wdt.c index 59f0913..3aefdde 100644 --- a/drivers/watchdog/menf21bmc_wdt.c +++ b/drivers/watchdog/menf21bmc_wdt.c @@ -130,6 +130,7 @@ static int menf21bmc_wdt_probe(struct platform_device *pdev) drv_data->wdt.info = &menf21bmc_wdt_info; drv_data->wdt.min_timeout = BMC_WD_TIMEOUT_MIN; drv_data->wdt.max_timeout = BMC_WD_TIMEOUT_MAX; + drv_data->wdt.parent = &pdev->dev; drv_data->i2c_client = i2c_client; /* diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index de911c7..d96bee0 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -253,6 +253,7 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.ops = &omap_wdt_ops; wdev->wdog.min_timeout = TIMER_MARGIN_MIN; wdev->wdog.max_timeout = TIMER_MARGIN_MAX; + wdev->wdog.parent = &pdev->dev; if (watchdog_init_timeout(&wdev->wdog, timer_margin, &pdev->dev) < 0) wdev->wdog.timeout = TIMER_MARGIN_DEFAULT; diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index ef0c628..c6b8f4a 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -567,6 +567,7 @@ static int orion_wdt_probe(struct platform_device *pdev) dev->wdt.timeout = wdt_max_duration; dev->wdt.max_timeout = wdt_max_duration; + dev->wdt.parent = &pdev->dev; watchdog_init_timeout(&dev->wdt, heartbeat, &pdev->dev); platform_set_drvdata(pdev, &dev->wdt); diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c index b9c6049..4224b3e 100644 --- a/drivers/watchdog/pnx4008_wdt.c +++ b/drivers/watchdog/pnx4008_wdt.c @@ -167,6 +167,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ? WDIOF_CARDRESET : 0; + pnx4008_wdd.parent = &pdev->dev; watchdog_set_nowayout(&pnx4008_wdd, nowayout); pnx4008_wdt_stop(&pnx4008_wdd); /* disable for now */ diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index aa03ca8..773dcfa 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -171,6 +171,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) wdt->wdd.ops = &qcom_wdt_ops; wdt->wdd.min_timeout = 1; wdt->wdd.max_timeout = 0x10000000U / wdt->rate; + wdt->wdd.parent = &pdev->dev; /* * If 'timeout-sec' unspecified in devicetree, assume a 30 second diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c index b7c68e27..39cd51d 100644 --- a/drivers/watchdog/retu_wdt.c +++ b/drivers/watchdog/retu_wdt.c @@ -127,6 +127,7 @@ static int retu_wdt_probe(struct platform_device *pdev) retu_wdt->timeout = RETU_WDT_MAX_TIMER; retu_wdt->min_timeout = 0; retu_wdt->max_timeout = RETU_WDT_MAX_TIMER; + retu_wdt->parent = &pdev->dev; watchdog_set_drvdata(retu_wdt, wdev); watchdog_set_nowayout(retu_wdt, nowayout); diff --git a/drivers/watchdog/rt2880_wdt.c b/drivers/watchdog/rt2880_wdt.c index a6f7e2e..1967919 100644 --- a/drivers/watchdog/rt2880_wdt.c +++ b/drivers/watchdog/rt2880_wdt.c @@ -161,6 +161,7 @@ static int rt288x_wdt_probe(struct platform_device *pdev) rt288x_wdt_dev.dev = &pdev->dev; rt288x_wdt_dev.bootstatus = rt288x_wdt_bootcause(); rt288x_wdt_dev.max_timeout = (0xfffful / rt288x_wdt_freq); + rt288x_wdt_dev.parent = &pdev->dev; watchdog_init_timeout(&rt288x_wdt_dev, rt288x_wdt_dev.max_timeout, &pdev->dev); diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index e89ae02..d781000 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -607,6 +607,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&wdt->wdt_device, nowayout); wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt); + wdt->wdt_device.parent = &pdev->dev; ret = watchdog_register_device(&wdt->wdt_device); if (ret) { diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index 567458b..f908121 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -252,6 +252,7 @@ static int sh_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&sh_wdt_dev, nowayout); watchdog_set_drvdata(&sh_wdt_dev, wdt); + sh_wdt_dev.parent = &pdev->dev; spin_lock_init(&wdt->lock); diff --git a/drivers/watchdog/sirfsoc_wdt.c b/drivers/watchdog/sirfsoc_wdt.c index 42fa5c0c5..d0578ab 100644 --- a/drivers/watchdog/sirfsoc_wdt.c +++ b/drivers/watchdog/sirfsoc_wdt.c @@ -154,6 +154,7 @@ static int sirfsoc_wdt_probe(struct platform_device *pdev) watchdog_init_timeout(&sirfsoc_wdd, timeout, &pdev->dev); watchdog_set_nowayout(&sirfsoc_wdd, nowayout); + sirfsoc_wdd.parent = &pdev->dev; ret = watchdog_register_device(&sirfsoc_wdd); if (ret) diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 4e7fec3..01d8162 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -226,6 +226,7 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id) wdt->adev = adev; wdt->wdd.info = &wdt_info; wdt->wdd.ops = &wdt_ops; + wdt->wdd.parent = &adev->dev; spin_lock_init(&wdt->lock); watchdog_set_nowayout(&wdt->wdd, nowayout); diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c index 6785afd..14e9bad 100644 --- a/drivers/watchdog/st_lpc_wdt.c +++ b/drivers/watchdog/st_lpc_wdt.c @@ -241,6 +241,7 @@ static int st_wdog_probe(struct platform_device *pdev) return -EINVAL; } st_wdog_dev.max_timeout = 0xFFFFFFFF / st_wdog->clkrate; + st_wdog_dev.parent = &pdev->dev; ret = clk_prepare_enable(clk); if (ret) { diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c index e7f0d5b..3ee6128 100644 --- a/drivers/watchdog/stmp3xxx_rtc_wdt.c +++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c @@ -76,6 +76,7 @@ static int stmp3xxx_wdt_probe(struct platform_device *pdev) watchdog_set_drvdata(&stmp3xxx_wdd, &pdev->dev); stmp3xxx_wdd.timeout = clamp_t(unsigned, heartbeat, 1, STMP3XXX_MAX_TIMEOUT); + stmp3xxx_wdd.parent = &pdev->dev; ret = watchdog_register_device(&stmp3xxx_wdd); if (ret < 0) { diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c index 30451ea..7f97cdd 100644 --- a/drivers/watchdog/tegra_wdt.c +++ b/drivers/watchdog/tegra_wdt.c @@ -218,6 +218,7 @@ static int tegra_wdt_probe(struct platform_device *pdev) wdd->ops = &tegra_wdt_ops; wdd->min_timeout = MIN_WDT_TIMEOUT; wdd->max_timeout = MAX_WDT_TIMEOUT; + wdd->parent = &pdev->dev; watchdog_set_drvdata(wdd, wdt); diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c index 2c1db6f..9bf3cc0 100644 --- a/drivers/watchdog/twl4030_wdt.c +++ b/drivers/watchdog/twl4030_wdt.c @@ -83,6 +83,7 @@ static int twl4030_wdt_probe(struct platform_device *pdev) wdt->timeout = 30; wdt->min_timeout = 1; wdt->max_timeout = 30; + wdt->parent = &pdev->dev; watchdog_set_nowayout(wdt, nowayout); platform_set_drvdata(pdev, wdt); diff --git a/drivers/watchdog/txx9wdt.c b/drivers/watchdog/txx9wdt.c index 7f61593..c2da880 100644 --- a/drivers/watchdog/txx9wdt.c +++ b/drivers/watchdog/txx9wdt.c @@ -131,6 +131,7 @@ static int __init txx9wdt_probe(struct platform_device *dev) txx9wdt.timeout = timeout; txx9wdt.min_timeout = 1; txx9wdt.max_timeout = WD_MAX_TIMEOUT; + txx9wdt.parent = &dev->dev; watchdog_set_nowayout(&txx9wdt, nowayout); ret = watchdog_register_device(&txx9wdt); diff --git a/drivers/watchdog/ux500_wdt.c b/drivers/watchdog/ux500_wdt.c index 9de09ab..37c0843 100644 --- a/drivers/watchdog/ux500_wdt.c +++ b/drivers/watchdog/ux500_wdt.c @@ -96,6 +96,7 @@ static int ux500_wdt_probe(struct platform_device *pdev) ux500_wdt.max_timeout = WATCHDOG_MAX28; } + ux500_wdt.parent = &pdev->dev; watchdog_set_nowayout(&ux500_wdt, nowayout); /* disable auto off on sleep */ diff --git a/drivers/watchdog/via_wdt.c b/drivers/watchdog/via_wdt.c index 56369c4..5f9cbc3 100644 --- a/drivers/watchdog/via_wdt.c +++ b/drivers/watchdog/via_wdt.c @@ -206,6 +206,7 @@ static int wdt_probe(struct pci_dev *pdev, timeout = WDT_TIMEOUT; wdt_dev.timeout = timeout; + wdt_dev.parent = &pdev->dev; watchdog_set_nowayout(&wdt_dev, nowayout); if (readl(wdt_mem) & VIA_WDT_FIRED) wdt_dev.bootstatus |= WDIOF_CARDRESET; diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c index 2fa17e7..8d1184a 100644 --- a/drivers/watchdog/wm831x_wdt.c +++ b/drivers/watchdog/wm831x_wdt.c @@ -215,6 +215,7 @@ static int wm831x_wdt_probe(struct platform_device *pdev) wm831x_wdt->info = &wm831x_wdt_info; wm831x_wdt->ops = &wm831x_wdt_ops; + wm831x_wdt->parent = &pdev->dev; watchdog_set_nowayout(wm831x_wdt, nowayout); watchdog_set_drvdata(wm831x_wdt, driver_data); diff --git a/drivers/watchdog/wm8350_wdt.c b/drivers/watchdog/wm8350_wdt.c index 34d272a..4ab4b83 100644 --- a/drivers/watchdog/wm8350_wdt.c +++ b/drivers/watchdog/wm8350_wdt.c @@ -151,6 +151,7 @@ static int wm8350_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&wm8350_wdt, nowayout); watchdog_set_drvdata(&wm8350_wdt, wm8350); + wm8350_wdt.parent = &pdev->dev; /* Default to 4s timeout */ wm8350_wdt_set_timeout(&wm8350_wdt, 4); -- cgit v0.10.2 From 1b2b90cbea00670221f062814dc8bcecb3af7b90 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 9 Sep 2015 16:58:22 +0530 Subject: PM / OPP: Return suspend_opp only if it is enabled There is no point returning suspend_opp, if it is disabled by the core. As we can't use it at all. Fix it. Fixes: 4eafbd15b6c8 ("PM / OPP: add dev_pm_opp_get_suspend_opp() helper") Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 3df62db..4eff4cd 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -345,7 +345,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); * @dev: device for which we do this operation * * Return: This function returns pointer to the suspend opp if it is - * defined, otherwise it returns NULL. + * defined and available, otherwise it returns NULL. * * Locking: This function must be called under rcu_read_lock(). opp is a rcu * protected pointer. The reason for the same is that the opp pointer which is @@ -356,17 +356,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { struct device_opp *dev_opp; - struct dev_pm_opp *opp; opp_rcu_lockdep_assert(); dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) - opp = NULL; - else - opp = dev_opp->suspend_opp; + if (IS_ERR(dev_opp) || !dev_opp->suspend_opp || + !dev_opp->suspend_opp->available) + return NULL; - return opp; + return dev_opp->suspend_opp; } EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); -- cgit v0.10.2 From 43717aadd2bc87fb10fbf1cd815c1cbae9bb95b3 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 9 Sep 2015 18:27:31 +0800 Subject: intel_pstate: Fix user input of min/max to legal policy region In current code, max_perf_pct might be smaller than min_perf_pct by improper user input: $ grep . /sys/devices/system/cpu/intel_pstate/m*_perf_pct /sys/devices/system/cpu/intel_pstate/max_perf_pct:100 /sys/devices/system/cpu/intel_pstate/min_perf_pct:100 $ echo 80 > /sys/devices/system/cpu/intel_pstate/max_perf_pct $ grep . /sys/devices/system/cpu/intel_pstate/m*_perf_pct /sys/devices/system/cpu/intel_pstate/max_perf_pct:80 /sys/devices/system/cpu/intel_pstate/min_perf_pct:100 Fix this problem by 2 steps: 1. Normalize the user input to [min_policy, max_policy]. 2. Make sure max_perf_pct>=min_perf_pct, suggested by Seiichi Ikarashi. Signed-off-by: Chen Yu Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 31d0548..63075cc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -423,6 +423,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); + limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); + limits.max_perf_pct = max(limits.min_perf_pct, limits.max_perf_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); if (hwp_active) @@ -442,6 +444,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits.min_sysfs_pct = clamp_t(int, input, 0 , 100); limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); + limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); + limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); if (hwp_active) @@ -989,12 +993,19 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100); - limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); - limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); - limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); + + /* Normalize user input to [min_policy_pct, max_policy_pct] */ + limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); + limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); + limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); + + /* Make sure min_perf_pct <= max_perf_pct */ + limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); + + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); if (hwp_active) -- cgit v0.10.2 From 74da56ce5c6715630aed3ccc0fcb86a9210c1a56 Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Wed, 9 Sep 2015 11:41:22 -0700 Subject: intel_pstate: fix PCT_TO_HWP macro PCT_TO_HWP does not take the actual range of pstates exported by HWP_CAPABILITIES in account, and is broken on most platforms. Remove the macro and set the min and max pstate for hwp by determining the range and adjusting by the min and max percent limits values. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 63075cc..27faee5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -260,24 +260,31 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } -#define PCT_TO_HWP(x) (x * 255 / 100) static void intel_pstate_hwp_set(void) { - int min, max, cpu; - u64 value, freq; + int min, hw_min, max, hw_max, cpu, range, adj_range; + u64 value, cap; + + rdmsrl(MSR_HWP_CAPABILITIES, cap); + hw_min = HWP_LOWEST_PERF(cap); + hw_max = HWP_HIGHEST_PERF(cap); + range = hw_max - hw_min; get_online_cpus(); for_each_online_cpu(cpu) { rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - min = PCT_TO_HWP(limits.min_perf_pct); + adj_range = limits.min_perf_pct * range / 100; + min = hw_min + adj_range; value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = PCT_TO_HWP(limits.max_perf_pct); + adj_range = limits.max_perf_pct * range / 100; + max = hw_min + adj_range; if (limits.no_turbo) { - rdmsrl( MSR_HWP_CAPABILITIES, freq); - max = HWP_GUARANTEED_PERF(freq); + hw_max = HWP_GUARANTEED_PERF(cap); + if (hw_max < max) + max = hw_max; } value &= ~HWP_MAX_PERF(~0L); -- cgit v0.10.2 From 39dc53deff30d9b239ac36cfeb0ef2022d03a449 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:29:15 +0100 Subject: ARM: swpan: fix nwfpe for uaccess changes NWFPE needs to access userspace to check whether the next instruction is another FP instruction. Allow userspace access for this read. Signed-off-by: Russell King diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 71df435..39c20af 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -95,9 +95,10 @@ emulate: reteq r4 @ no, return failure next: + uaccess_enable r3 .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC - + uaccess_disable r3 and r2, r6, #0x0F000000 @ test for FP insns teq r2, #0x0C000000 teqne r2, #0x0D000000 -- cgit v0.10.2 From 296254f3223d201f2aa53f5f717eedfdc63f3db8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:30:06 +0100 Subject: ARM: uaccess: remove unneeded uaccess_save_and_disable macro This macro is never referenced, remove it. Signed-off-by: Russell King diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 3ae0eda..9007c51 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -487,11 +487,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm - .macro uaccess_save_and_disable, tmp - uaccess_save \tmp - uaccess_disable \tmp - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 -- cgit v0.10.2 From af4cb25df93d2e7a97d65db2bfacaa4400988dea Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 9 Sep 2015 21:19:49 +0100 Subject: ARM: uaccess: fix undefined instruction on ARMv7M/noMMU The use of get_domain() in copy_thread() results in an oops on ARMv7M/noMMU systems. The thread cpu_domain value is only used when CONFIG_CPU_USE_DOMAINS is enabled, so there's no need to save the value in copy_thread() except when this is enabled, and this option will never be enabled on these platforms. Unhandled exception: IPSR = 00000006 LR = fffffff1 CPU: 0 PID: 0 Comm: swapper Not tainted 4.2.0-next-20150909-00001-gb8ec5ad #41 Hardware name: NXP LPC18xx/43xx (Device Tree) task: 2823fbe0 ti: 2823c000 task.ti: 2823c000 PC is at copy_thread+0x18/0x92 LR is at copy_thread+0x19/0x92 pc : [<2800a46e>] lr : [<2800a46f>] psr: 4100000b sp : 2823df00 ip : 00000000 fp : 287c81c0 r10: 00000000 r9 : 00800300 r8 : 287c8000 r7 : 287c8000 r6 : 2818908d r5 : 00000000 r4 : 287ca000 r3 : 00000000 r2 : 00000000 r1 : fffffff0 r0 : 287ca048 xPSR: 4100000b Reported-by: Ariel D'Alessandro Signed-off-by: Russell King diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3f18098..e550a45 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -233,6 +233,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); +#ifdef CONFIG_CPU_USE_DOMAINS /* * Copy the initial value of the domain access control register * from the current thread: thread->addr_limit will have been @@ -240,6 +241,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, * kernel/fork.c */ thread->cpu_domain = get_domain(); +#endif if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); -- cgit v0.10.2 From e297c939b745e420ef0b9dc989cb87bda617b399 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 10 Sep 2015 14:36:21 +1000 Subject: powerpc/MSI: Fix race condition in tearing down MSI interrupts This fixes a race which can result in the same virtual IRQ number being assigned to two different MSI interrupts. The most visible consequence of that is usually a warning and stack trace from the sysfs code about an attempt to create a duplicate entry in sysfs. The race happens when one CPU (say CPU 0) is disposing of an MSI while another CPU (say CPU 1) is setting up an MSI. CPU 0 calls (for example) pnv_teardown_msi_irqs(), which calls msi_bitmap_free_hwirqs() to indicate that the MSI (i.e. its hardware IRQ number) is no longer in use. Then, before CPU 0 gets to calling irq_dispose_mapping() to free up the virtal IRQ number, CPU 1 comes in and calls msi_bitmap_alloc_hwirqs() to allocate an MSI, and gets the same hardware IRQ number that CPU 0 just freed. CPU 1 then calls irq_create_mapping() to get a virtual IRQ number, which sees that there is currently a mapping for that hardware IRQ number and returns the corresponding virtual IRQ number (which is the same virtual IRQ number that CPU 0 was using). CPU 0 then calls irq_dispose_mapping() and frees that virtual IRQ number. Now, if another CPU comes along and calls irq_create_mapping(), it is likely to get the virtual IRQ number that was just freed, resulting in the same virtual IRQ number apparently being used for two different hardware interrupts. To fix this race, we just move the call to msi_bitmap_free_hwirqs() to after the call to irq_dispose_mapping(). Since virq_to_hw() doesn't work for the virtual IRQ number after irq_dispose_mapping() has been called, we need to call it before irq_dispose_mapping() and remember the result for the msi_bitmap_free_hwirqs() call. The pattern of calling msi_bitmap_free_hwirqs() before irq_dispose_mapping() appears in 5 places under arch/powerpc, and appears to have originated in commit 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") from 2007. Fixes: 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") Cc: stable@vger.kernel.org # v2.6.22+ Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e66ef19..b304a9f 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -63,6 +63,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -70,10 +71,10 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b2480b..f2dd772 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 5916da1..48a576a 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 70fbd56..2cbc7e2 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 24d0470..8fb8061 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } -- cgit v0.10.2 From f1ab428711358fbb747ba392c3448462494e6c6a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Sep 2015 18:22:35 +1000 Subject: crypto: vmx - VMX crypto should depend on CONFIG_VSX This code uses FP (floating point), Altivec and VSX (Vector-Scalar Extension). It can just depend on CONFIG_VSX though, because that already depends on FP and Altivec. Otherwise we get lots of link errors such as: drivers/built-in.o: In function `.p8_aes_setkey': aes.c:(.text+0x2d325c): undefined reference to `.enable_kernel_altivec' aes.c:(.text+0x2d326c): undefined reference to `.enable_kernel_vsx' Signed-off-by: Michael Ellerman Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 07bc7aa..d234719 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -461,7 +461,7 @@ config CRYPTO_DEV_QCE config CRYPTO_DEV_VMX bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 + depends on PPC64 && VSX help Support for VMX cryptographic acceleration instructions. -- cgit v0.10.2 From 9da75de030bb6e49475ef37c8495d07e98cfeb33 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 9 Sep 2015 14:27:07 +0200 Subject: crypto: sunxi-ss - Fix a possible driver hang with ciphers The sun4i_ss_opti_poll function cipher data until the output miter have a length of 0. If the crypto API client, give more SGs than necessary this could result in an infinite loop. Fix it by checking for remaining bytes, just like sun4i_ss_cipher_poll(). Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index e070c31..a19ee12 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -104,7 +104,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) sg_miter_next(&mo); oo = 0; } - } while (mo.length > 0); + } while (oleft > 0); if (areq->info) { for (i = 0; i < 4 && i < ivsize / 4; i++) { -- cgit v0.10.2 From 2a3fffd45822070309bcf0b1e1dae624d633824a Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Thu, 10 Sep 2015 09:32:14 +0000 Subject: spi: bcm2835: BUG: fix wrong use of PAGE_MASK There is a bug in the alignment checking of transfers, that results in DMA not being used for un-aligned transfers that do not cross page-boundries, which is valid. This is due to a missconception of the meaning PAGE_MASK when implementing that check originally - (PAGE_SIZE - 1) should have been used instead. Also fixes a copy/paste error. Reported-by: Signed-off-by: Martin Sperl Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 59705ab..ed74786 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -386,14 +386,14 @@ static bool bcm2835_spi_can_dma(struct spi_master *master, /* otherwise we only allow transfers within the same page * to avoid wasting time on dma_mapping when it is not practical */ - if (((size_t)tfr->tx_buf & PAGE_MASK) + tfr->len > PAGE_SIZE) { + if (((size_t)tfr->tx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) { dev_warn_once(&spi->dev, "Unaligned spi tx-transfer bridging page\n"); return false; } - if (((size_t)tfr->rx_buf & PAGE_MASK) + tfr->len > PAGE_SIZE) { + if (((size_t)tfr->rx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) { dev_warn_once(&spi->dev, - "Unaligned spi tx-transfer bridging page\n"); + "Unaligned spi rx-transfer bridging page\n"); return false; } -- cgit v0.10.2 From d630526d0aa6acc0868dae892b1febda72029a3e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 10 Sep 2015 10:19:52 +0200 Subject: spi: atmel: remove warning when !CONFIG_PM_SLEEP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_PM is defined but not CONFIG_PM_SLEEP (this happens when CONFIG_SUSPEND is not defined), there is the following warning: drivers/spi/spi-atmel.c:1723:12: warning: ‘atmel_spi_suspend’ defined but not used [-Wunused-function] drivers/spi/spi-atmel.c:1741:12: warning: ‘atmel_spi_resume’ defined but not used [-Wunused-function] Enclose both atmel_spi_suspend and atmel_spi_resume in #ifdef CONFIG_PM_SLEEP/#endif to solve that. Signed-off-by: Alexandre Belloni Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index c9eca34..709fb6f 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1721,6 +1721,7 @@ static int atmel_spi_runtime_resume(struct device *dev) return clk_prepare_enable(as->clk); } +#ifdef CONFIG_PM_SLEEP static int atmel_spi_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); @@ -1757,6 +1758,7 @@ static int atmel_spi_resume(struct device *dev) return ret; } +#endif static const struct dev_pm_ops atmel_spi_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(atmel_spi_suspend, atmel_spi_resume) -- cgit v0.10.2 From 3a0e27d84bb9abac5e39dc71706768a88c72cb71 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Thu, 10 Sep 2015 09:45:55 +0200 Subject: ASoC: sti: check return of of_property_read Add check on of_property_read to return error when DT required property is not defined. Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index f6eefe1..843f037 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -989,8 +989,8 @@ static int uni_player_parse_dt(struct platform_device *pdev, if (!info) return -ENOMEM; - of_property_read_u32(pnode, "version", &player->ver); - if (player->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) { + if (of_property_read_u32(pnode, "version", &player->ver) || + player->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) { dev_err(dev, "Unknown uniperipheral version "); return -EINVAL; } @@ -998,10 +998,16 @@ static int uni_player_parse_dt(struct platform_device *pdev, if (player->ver >= SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0) info->underflow_enabled = 1; - of_property_read_u32(pnode, "uniperiph-id", &info->id); + if (of_property_read_u32(pnode, "uniperiph-id", &info->id)) { + dev_err(dev, "uniperipheral id not defined"); + return -EINVAL; + } /* Read the device mode property */ - of_property_read_string(pnode, "mode", &mode); + if (of_property_read_string(pnode, "mode", &mode)) { + dev_err(dev, "uniperipheral mode not defined"); + return -EINVAL; + } if (strcasecmp(mode, "hdmi") == 0) info->player_type = SND_ST_UNIPERIF_PLAYER_TYPE_HDMI; diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index c502626..f791239 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -316,7 +316,11 @@ static int uni_reader_parse_dt(struct platform_device *pdev, if (!info) return -ENOMEM; - of_property_read_u32(node, "version", &reader->ver); + if (of_property_read_u32(node, "version", &reader->ver) || + reader->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) { + dev_err(&pdev->dev, "Unknown uniperipheral version "); + return -EINVAL; + } /* Save the info structure */ reader->info = info; -- cgit v0.10.2 From 85e0a0f21a14bfd9145422a6a627c3df47101bd8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 10 Sep 2015 10:36:06 +0100 Subject: Btrfs: remove unnecessary locking of cleaner_mutex to avoid deadlock After commmit e44163e17796 ("btrfs: explictly delete unused block groups in close_ctree and ro-remount"), added in the 4.3 merge window, we have calls to btrfs_delete_unused_bgs() while holding the cleaner_mutex. This can cause a deadlock with a concurrent block group relocation (when a filesystem balance or shrink operation is in progress for example) because btrfs_delete_unused_bgs() locks delete_unused_bgs_mutex and the relocation path locks first delete_unused_bgs_mutex and then it locks cleaner_mutex, resulting in a classic ABBA deadlock: CPU 0 CPU 1 lock fs_info->cleaner_mutex __btrfs_balance() || btrfs_shrink_device() lock fs_info->delete_unused_bgs_mutex btrfs_relocate_chunk() btrfs_relocate_block_group() lock fs_info->cleaner_mutex btrfs_delete_unused_bgs() lock fs_info->delete_unused_bgs_mutex Fix this by not taking the cleaner_mutex before calling btrfs_delete_unused_bgs() because it's no longer needed after commit 67c5e7d464bc ("Btrfs: fix race between balance and unused block group deletion"). The mutex fs_info->delete_unused_bgs_mutex, the spinlock fs_info->unused_bgs_lock and a block group's spinlock are enough to get correct serialization between tasks running relocation and unused block group deletion (as well as between multiple tasks concurrently calling btrfs_delete_unused_bgs()). This issue was discussed (in the mailing list) during the review of the patch titled "btrfs: explictly delete unused block groups in close_ctree and ro-remount" and it was agreed that acquiring the cleaner mutex had to be dropped after the patch titled "Btrfs: fix race between balance and unused block group deletion" got merged (both patches were submitted at about the same time, but one landed in kernel 4.2 and the other in the 4.3 merge window). Signed-off-by: Filipe Manana diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0b658d0..aa59871 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3762,9 +3762,7 @@ void close_ctree(struct btrfs_root *root) * block groups queued for removal, the deletion will be * skipped when we quit the cleaner thread. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(root->fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); ret = btrfs_commit_super(root); if (ret) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c389c13..5a186d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) * groups on disk until we're mounted read-write again * unless we clean them up here. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); -- cgit v0.10.2 From 85b4e4eb2f9ac29ee8ec47f1f055f251cb251a3c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 10 Sep 2015 00:08:45 +0200 Subject: wmi: Remove private %pUL implementation The work performed by wmi_gtoa is equivalent to simply sprintf(out, "%pUL", in), so one could replace its body by this. However, most users feed the result directly as a %s argument to some other function which also understands the %p extensions (they all ultimately use vsnprintf), so we can eliminate some stack buffers and quite a bit of code by just using %pUL directly. In wmi_dev_uevent I'm not sure whether there's room for a nul-terminator in env->buf, so I've just replaced wmi_gtoa with the equivalent sprintf call. Signed-off-by: Rasmus Villemoes Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index aac4757..eb391a2 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -194,34 +194,6 @@ static bool wmi_parse_guid(const u8 *src, u8 *dest) return true; } -/* - * Convert a raw GUID to the ACII string representation - */ -static int wmi_gtoa(const char *in, char *out) -{ - int i; - - for (i = 3; i >= 0; i--) - out += sprintf(out, "%02X", in[i] & 0xFF); - - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[5] & 0xFF); - out += sprintf(out, "%02X", in[4] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[7] & 0xFF); - out += sprintf(out, "%02X", in[6] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[8] & 0xFF); - out += sprintf(out, "%02X", in[9] & 0xFF); - out += sprintf(out, "-"); - - for (i = 10; i <= 15; i++) - out += sprintf(out, "%02X", in[i] & 0xFF); - - *out = '\0'; - return 0; -} - static bool find_guid(const char *guid_string, struct wmi_block **out) { char tmp[16], guid_input[16]; @@ -457,11 +429,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block); static void wmi_dump_wdg(const struct guid_block *g) { - char guid_string[37]; - - wmi_gtoa(g->guid, guid_string); - - pr_info("%s:\n", guid_string); + pr_info("%pUL:\n", g->guid); pr_info("\tobject_id: %c%c\n", g->object_id[0], g->object_id[1]); pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); @@ -661,7 +629,6 @@ EXPORT_SYMBOL_GPL(wmi_has_guid); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - char guid_string[37]; struct wmi_block *wblock; wblock = dev_get_drvdata(dev); @@ -670,9 +637,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, return strlen(buf); } - wmi_gtoa(wblock->gblock.guid, guid_string); - - return sprintf(buf, "wmi:%s\n", guid_string); + return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid); } static DEVICE_ATTR_RO(modalias); @@ -695,7 +660,7 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) if (!wblock) return -ENOMEM; - wmi_gtoa(wblock->gblock.guid, guid_string); + sprintf(guid_string, "%pUL", wblock->gblock.guid); strcpy(&env->buf[env->buflen - 1], "wmi:"); memcpy(&env->buf[env->buflen - 1 + 4], guid_string, 36); @@ -721,12 +686,9 @@ static struct class wmi_class = { static int wmi_create_device(const struct guid_block *gblock, struct wmi_block *wblock, acpi_handle handle) { - char guid_string[37]; - wblock->dev.class = &wmi_class; - wmi_gtoa(gblock->guid, guid_string); - dev_set_name(&wblock->dev, "%s", guid_string); + dev_set_name(&wblock->dev, "%pUL", gblock->guid); dev_set_drvdata(&wblock->dev, wblock); @@ -877,7 +839,6 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) struct guid_block *block; struct wmi_block *wblock; struct list_head *p; - char guid_string[37]; list_for_each(p, &wmi_block_list) { wblock = list_entry(p, struct wmi_block, list); @@ -888,8 +849,8 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) if (wblock->handler) wblock->handler(event, wblock->handler_data); if (debug_event) { - wmi_gtoa(wblock->gblock.guid, guid_string); - pr_info("DEBUG Event GUID: %s\n", guid_string); + pr_info("DEBUG Event GUID: %pUL\n", + wblock->gblock.guid); } acpi_bus_generate_netlink_event( -- cgit v0.10.2 From aefc574bbbbe74bb891ba392d98f2d59a417c774 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 10 Sep 2015 10:00:55 +0200 Subject: Revert "twl4030_charger: correctly handle -EPROBE_DEFER from devm_usb_get_phy_by_node" Revert commit 3fc3895e4fe17ee92ae1d1bb9f04da6069e8c370, since it introduced a boot failure on some OMAP platforms. Reported-by: Kevin Hilman Signed-off-by: Sebastian Reichel diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index f4f2c1f..54b0325 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -1057,13 +1057,9 @@ static int twl4030_bci_probe(struct platform_device *pdev) phynode = of_find_compatible_node(bci->dev->of_node->parent, NULL, "ti,twl4030-usb"); - if (phynode) { + if (phynode) bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); - if (IS_ERR(bci->transceiver) && - PTR_ERR(bci->transceiver) == -EPROBE_DEFER) - return -EPROBE_DEFER; - } } /* Enable interrupts now. */ -- cgit v0.10.2 From e11fc21e756e662e465cac3da6547d438d0b1446 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 5 Sep 2015 02:32:34 +0300 Subject: twl4030_charger: fix another compile error When CONFIG_CHARGER_TWL4030=y and CONFIG_TWL4030_MADC=m we get a compile error: drivers/built-in.o: In function `twl4030_charger_update_current': twl4030_charger.c:(.text+0x504681): undefined reference to `twl4030_get_madc_conversion' Use IS_REACHABLE to fix it. Cc: NeilBrown Reported-by: Randy Dunlap Signed-off-by: Grazvydas Ignotas Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index 54b0325..74f2d3f 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -91,7 +91,7 @@ #define TWL4030_MSTATEC_COMPLETE1 0x0b #define TWL4030_MSTATEC_COMPLETE4 0x0e -#if IS_ENABLED(CONFIG_TWL4030_MADC) +#if IS_REACHABLE(CONFIG_TWL4030_MADC) /* * If AC (Accessory Charger) voltage exceeds 4.5V (MADC 11) * then AC is available. -- cgit v0.10.2 From bb0f73616396e7929b68d3bdea70064003599d33 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 21 Aug 2015 19:51:41 +0200 Subject: clk: rockchip: handle critical clocks after registering all clocks Currently the registration of critical clocks is done in the function shared between rk3066 and rk3188 clock trees. That results in them getting handled maybe before all of them are registered. Therefore move the critical clock handling down to the end of the soc- specific clock registration function, so that all clocks are registered before they're maybe handled as critical clock. Signed-off-by: Heiko Stuebner Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index ed02bbc..f26e3ed 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -744,8 +744,6 @@ static void __init rk3188_common_clk_init(struct device_node *np) rockchip_clk_register_branches(common_clk_branches, ARRAY_SIZE(common_clk_branches)); - rockchip_clk_protect_critical(rk3188_critical_clocks, - ARRAY_SIZE(rk3188_critical_clocks)); rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ROCKCHIP_SOFTRST_HIWORD_MASK); @@ -765,6 +763,8 @@ static void __init rk3066a_clk_init(struct device_node *np) mux_armclk_p, ARRAY_SIZE(mux_armclk_p), &rk3066_cpuclk_data, rk3066_cpuclk_rates, ARRAY_SIZE(rk3066_cpuclk_rates)); + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); @@ -801,6 +801,9 @@ static void __init rk3188a_clk_init(struct device_node *np) pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", __func__); } + + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); -- cgit v0.10.2 From 1166160ab531198f7abc773992c0e04d0f9b7600 Mon Sep 17 00:00:00 2001 From: Michael Niewoehner Date: Tue, 25 Aug 2015 22:22:07 +0200 Subject: clk: rockchip: add pclk_cpu to the list of rk3188 critical clocks pclk_cpu needs to keep running because it is needed for devices like the act8865 regulator but with the recent gpio clock handling this is not always the case anymore. So add it to the list of critical clocks. Signed-off-by: Michael Niewoehner Reviewed-by: Heiko Stuebner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index f26e3ed..fa2f36b 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -716,6 +716,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", "hclk_peri", + "pclk_cpu", }; static void __init rk3188_common_clk_init(struct device_node *np) -- cgit v0.10.2 From 3bba75a2ec32bd5fa7024a4de3b8cf9ee113a76a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Sun, 23 Aug 2015 11:32:37 +0200 Subject: clk: rockchip: Add pclk_peri to critical clocks on RK3066/RK3188 Now that the rockchip clock subsystem does clock gating with GPIO banks, these are no longer enabled once during probe and no longer stay enabled for eternity. When all these clocks are disabled, the parent clock pclk_peri might be disabled too, as no other child claims it. So, we need to add pclk_peri to the critical clocks. Signed-off-by: Romain Perier Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index fa2f36b..abb4760 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -717,6 +717,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_peri", "hclk_peri", "pclk_cpu", + "pclk_peri", }; static void __init rk3188_common_clk_init(struct device_node *np) -- cgit v0.10.2 From cf680eae34d26bd474c2ed3bd7d3aff59054aed5 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:44 -0600 Subject: toshiba_acpi: Fix USB Sleep and Music always disabled Commit e1a949c1b988 ("toshiba_acpi: Refactor *{get, set} functions return value") made changes on the return type of the HCI/SCI functions, but a typo on the USB Sleep and Music code is always reporting non existent support for such feature. This patch corrects the typo, changing an assignment to a comparison, making the laptops with actual support for such feature to work again. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 6740c51..c434b53 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -938,7 +938,7 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state) else if (result == TOS_NOT_SUPPORTED) return -ENODEV; - return result = TOS_SUCCESS ? 0 : -EIO; + return result == TOS_SUCCESS ? 0 : -EIO; } static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state) -- cgit v0.10.2 From 53147b6cabee5e8d1997b5682fcc0c3b72ddf9c2 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:45 -0600 Subject: toshiba_acpi: Fix hotkeys registration on some toshiba models Commit a2b3471b5b13 ("toshiba_acpi: Use the Hotkey Event Type function for keymap choosing") changed the *setup_keyboard function to query for the Hotkey Event Type to help choose the correct keymap, but turns out that here are certain Toshiba models out there not implementing this feature, and thus, failing to continue the input device registration and leaving such laptops without hotkey support. This patch changes such check, and instead of returning an error if the Hotkey Event Type is not present, we simply inform userspace about it, changing the message printed from err to notice, making the function responsible for registering the input device to continue. This issue was found on a Toshiba Portege Z30-B, but there might be some other models out there affected by this regression as well. Cc: # 4.1+ Signed-off-by: Azael Avalos Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index c434b53..f2372f4 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2398,11 +2398,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); -- cgit v0.10.2 From f47ff87f79a0f65db6af5b6bebd67b8f011994d1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 13 Jul 2015 08:58:51 +0200 Subject: PM / devfreq: event: Remove incorrect property in exynos-ppmu DT binding The exynos-ppmu driver is only a clock consumer and not a clock provider but its Device Tree binding listed #clock-cells as an optional property. Signed-off-by: Javier Martinez Canillas Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt index b54bf3a..aed4866 100644 --- a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt +++ b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt @@ -17,7 +17,6 @@ Required properties: Optional properties: - clock-names : the name of clock used by the PPMU, "ppmu" - clocks : phandles for clock specified in "clock-names" property -- #clock-cells: should be 1. Example1 : PPMU nodes in exynos3250.dtsi are listed below. -- cgit v0.10.2 From 77fe46a301a75795cf1d43ed2988a15c1600d1b5 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 24 Jul 2015 13:17:24 +0900 Subject: PM / devfreq: exynos-ppmu: Add the support of PPMUv2 for Exynos5433 This patch adds the support for PPMU (Platform Performance Monitoring Unit) version 2.0 for Exynos5433 SoC. Exynos5433 SoC must need PPMUv2 which is quite different from PPMUv1.1. The exynos-ppmu.c driver supports both PPMUv1.1 and PPMUv2. Cc: MyungJoo Ham Cc: Kyungmin Park Signed-off-by: Chanwoo Choi diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 7d99d13..f9901f5 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -1,7 +1,7 @@ /* * exynos_ppmu.c - EXYNOS PPMU (Platform Performance Monitoring Unit) support * - * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Copyright (c) 2014-2015 Samsung Electronics Co., Ltd. * Author : Chanwoo Choi * * This program is free software; you can redistribute it and/or modify @@ -82,6 +82,15 @@ struct __exynos_ppmu_events { PPMU_EVENT(mscl), PPMU_EVENT(fimd0x), PPMU_EVENT(fimd1x), + + /* Only for Exynos5433 SoCs */ + PPMU_EVENT(d0-cpu), + PPMU_EVENT(d0-general), + PPMU_EVENT(d0-rt), + PPMU_EVENT(d1-cpu), + PPMU_EVENT(d1-general), + PPMU_EVENT(d1-rt), + { /* sentinel */ }, }; @@ -96,6 +105,9 @@ static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev) return -EINVAL; } +/* + * The devfreq-event ops structure for PPMU v1.1 + */ static int exynos_ppmu_disable(struct devfreq_event_dev *edev) { struct exynos_ppmu *info = devfreq_event_get_drvdata(edev); @@ -200,10 +212,158 @@ static const struct devfreq_event_ops exynos_ppmu_ops = { .get_event = exynos_ppmu_get_event, }; +/* + * The devfreq-event ops structure for PPMU v2.0 + */ +static int exynos_ppmu_v2_disable(struct devfreq_event_dev *edev) +{ + struct exynos_ppmu *info = devfreq_event_get_drvdata(edev); + u32 pmnc, clear; + + /* Disable all counters */ + clear = (PPMU_CCNT_MASK | PPMU_PMCNT0_MASK | PPMU_PMCNT1_MASK + | PPMU_PMCNT2_MASK | PPMU_PMCNT3_MASK); + + __raw_writel(clear, info->ppmu.base + PPMU_V2_FLAG); + __raw_writel(clear, info->ppmu.base + PPMU_V2_INTENC); + __raw_writel(clear, info->ppmu.base + PPMU_V2_CNTENC); + __raw_writel(clear, info->ppmu.base + PPMU_V2_CNT_RESET); + + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CIG_CFG0); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CIG_CFG1); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CIG_CFG2); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CIG_RESULT); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CNT_AUTO); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CH_EV0_TYPE); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CH_EV1_TYPE); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CH_EV2_TYPE); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_CH_EV3_TYPE); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_SM_ID_V); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_SM_ID_A); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_SM_OTHERS_V); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_SM_OTHERS_A); + __raw_writel(0x0, info->ppmu.base + PPMU_V2_INTERRUPT_RESET); + + /* Disable PPMU */ + pmnc = __raw_readl(info->ppmu.base + PPMU_V2_PMNC); + pmnc &= ~PPMU_PMNC_ENABLE_MASK; + __raw_writel(pmnc, info->ppmu.base + PPMU_V2_PMNC); + + return 0; +} + +static int exynos_ppmu_v2_set_event(struct devfreq_event_dev *edev) +{ + struct exynos_ppmu *info = devfreq_event_get_drvdata(edev); + int id = exynos_ppmu_find_ppmu_id(edev); + u32 pmnc, cntens; + + /* Enable all counters */ + cntens = __raw_readl(info->ppmu.base + PPMU_V2_CNTENS); + cntens |= (PPMU_CCNT_MASK | (PPMU_ENABLE << id)); + __raw_writel(cntens, info->ppmu.base + PPMU_V2_CNTENS); + + /* Set the event of Read/Write data count */ + switch (id) { + case PPMU_PMNCNT0: + case PPMU_PMNCNT1: + case PPMU_PMNCNT2: + __raw_writel(PPMU_V2_RO_DATA_CNT | PPMU_V2_WO_DATA_CNT, + info->ppmu.base + PPMU_V2_CH_EVx_TYPE(id)); + break; + case PPMU_PMNCNT3: + __raw_writel(PPMU_V2_EVT3_RW_DATA_CNT, + info->ppmu.base + PPMU_V2_CH_EVx_TYPE(id)); + break; + } + + /* Reset cycle counter/performance counter and enable PPMU */ + pmnc = __raw_readl(info->ppmu.base + PPMU_V2_PMNC); + pmnc &= ~(PPMU_PMNC_ENABLE_MASK + | PPMU_PMNC_COUNTER_RESET_MASK + | PPMU_PMNC_CC_RESET_MASK + | PPMU_PMNC_CC_DIVIDER_MASK + | PPMU_V2_PMNC_START_MODE_MASK); + pmnc |= (PPMU_ENABLE << PPMU_PMNC_ENABLE_SHIFT); + pmnc |= (PPMU_ENABLE << PPMU_PMNC_COUNTER_RESET_SHIFT); + pmnc |= (PPMU_ENABLE << PPMU_PMNC_CC_RESET_SHIFT); + pmnc |= (PPMU_V2_MODE_MANUAL << PPMU_V2_PMNC_START_MODE_SHIFT); + __raw_writel(pmnc, info->ppmu.base + PPMU_V2_PMNC); + + return 0; +} + +static int exynos_ppmu_v2_get_event(struct devfreq_event_dev *edev, + struct devfreq_event_data *edata) +{ + struct exynos_ppmu *info = devfreq_event_get_drvdata(edev); + int id = exynos_ppmu_find_ppmu_id(edev); + u32 pmnc, cntenc; + u32 pmcnt_high, pmcnt_low; + u64 load_count = 0; + + /* Disable PPMU */ + pmnc = __raw_readl(info->ppmu.base + PPMU_V2_PMNC); + pmnc &= ~PPMU_PMNC_ENABLE_MASK; + __raw_writel(pmnc, info->ppmu.base + PPMU_V2_PMNC); + + /* Read cycle count and performance count */ + edata->total_count = __raw_readl(info->ppmu.base + PPMU_V2_CCNT); + + switch (id) { + case PPMU_PMNCNT0: + case PPMU_PMNCNT1: + case PPMU_PMNCNT2: + load_count = __raw_readl(info->ppmu.base + PPMU_V2_PMNCT(id)); + break; + case PPMU_PMNCNT3: + pmcnt_high = __raw_readl(info->ppmu.base + PPMU_V2_PMCNT3_HIGH); + pmcnt_low = __raw_readl(info->ppmu.base + PPMU_V2_PMCNT3_LOW); + load_count = (u64)((pmcnt_high & 0xff) << 32) + (u64)pmcnt_low; + break; + } + edata->load_count = load_count; + + /* Disable all counters */ + cntenc = __raw_readl(info->ppmu.base + PPMU_V2_CNTENC); + cntenc |= (PPMU_CCNT_MASK | (PPMU_ENABLE << id)); + __raw_writel(cntenc, info->ppmu.base + PPMU_V2_CNTENC); + + dev_dbg(&edev->dev, "%25s (load: %ld / %ld)\n", edev->desc->name, + edata->load_count, edata->total_count); + return 0; +} + +static const struct devfreq_event_ops exynos_ppmu_v2_ops = { + .disable = exynos_ppmu_v2_disable, + .set_event = exynos_ppmu_v2_set_event, + .get_event = exynos_ppmu_v2_get_event, +}; + +static const struct of_device_id exynos_ppmu_id_match[] = { + { + .compatible = "samsung,exynos-ppmu", + .data = (void *)&exynos_ppmu_ops, + }, { + .compatible = "samsung,exynos-ppmu-v2", + .data = (void *)&exynos_ppmu_v2_ops, + }, + { /* sentinel */ }, +}; + +static struct devfreq_event_ops *exynos_bus_get_ops(struct device_node *np) +{ + const struct of_device_id *match; + + match = of_match_node(exynos_ppmu_id_match, np); + return (struct devfreq_event_ops *)match->data; +} + static int of_get_devfreq_events(struct device_node *np, struct exynos_ppmu *info) { struct devfreq_event_desc *desc; + struct devfreq_event_ops *event_ops; struct device *dev = info->dev; struct device_node *events_np, *node; int i, j, count; @@ -214,6 +374,7 @@ static int of_get_devfreq_events(struct device_node *np, "failed to get child node of devfreq-event devices\n"); return -EINVAL; } + event_ops = exynos_bus_get_ops(np); count = of_get_child_count(events_np); desc = devm_kzalloc(dev, sizeof(*desc) * count, GFP_KERNEL); @@ -238,7 +399,7 @@ static int of_get_devfreq_events(struct device_node *np, continue; } - desc[j].ops = &exynos_ppmu_ops; + desc[j].ops = event_ops; desc[j].driver_data = info; of_property_read_string(node, "event-name", &desc[j].name); @@ -354,11 +515,6 @@ static int exynos_ppmu_remove(struct platform_device *pdev) return 0; } -static struct of_device_id exynos_ppmu_id_match[] = { - { .compatible = "samsung,exynos-ppmu", }, - { /* sentinel */ }, -}; - static struct platform_driver exynos_ppmu_driver = { .probe = exynos_ppmu_probe, .remove = exynos_ppmu_remove, diff --git a/drivers/devfreq/event/exynos-ppmu.h b/drivers/devfreq/event/exynos-ppmu.h index 4e831d4..05774c4 100644 --- a/drivers/devfreq/event/exynos-ppmu.h +++ b/drivers/devfreq/event/exynos-ppmu.h @@ -26,6 +26,9 @@ enum ppmu_counter { PPMU_PMNCNT_MAX, }; +/*** + * PPMUv1.1 Definitions + */ enum ppmu_event_type { PPMU_RO_BUSY_CYCLE_CNT = 0x0, PPMU_WO_BUSY_CYCLE_CNT = 0x1, @@ -90,4 +93,71 @@ enum ppmu_reg { #define PPMU_PMNCT(x) (PPMU_PMCNT0 + (0x10 * x)) #define PPMU_BEVTxSEL(x) (PPMU_BEVT0SEL + (0x100 * x)) +/*** + * PPMU_V2.0 definitions + */ +enum ppmu_v2_mode { + PPMU_V2_MODE_MANUAL = 0, + PPMU_V2_MODE_AUTO = 1, + PPMU_V2_MODE_CIG = 2, /* CIG (Conditional Interrupt Generation) */ +}; + +enum ppmu_v2_event_type { + PPMU_V2_RO_DATA_CNT = 0x4, + PPMU_V2_WO_DATA_CNT = 0x5, + + PPMU_V2_EVT3_RW_DATA_CNT = 0x22, /* Only for Event3 */ +}; + +enum ppmu_V2_reg { + /* PPC control register */ + PPMU_V2_PMNC = 0x04, + PPMU_V2_CNTENS = 0x08, + PPMU_V2_CNTENC = 0x0c, + PPMU_V2_INTENS = 0x10, + PPMU_V2_INTENC = 0x14, + PPMU_V2_FLAG = 0x18, + + /* Cycle Counter and Performance Event Counter Register */ + PPMU_V2_CCNT = 0x48, + PPMU_V2_PMCNT0 = 0x34, + PPMU_V2_PMCNT1 = 0x38, + PPMU_V2_PMCNT2 = 0x3c, + PPMU_V2_PMCNT3_LOW = 0x40, + PPMU_V2_PMCNT3_HIGH = 0x44, + + /* Bus Event Generator */ + PPMU_V2_CIG_CFG0 = 0x1c, + PPMU_V2_CIG_CFG1 = 0x20, + PPMU_V2_CIG_CFG2 = 0x24, + PPMU_V2_CIG_RESULT = 0x28, + PPMU_V2_CNT_RESET = 0x2c, + PPMU_V2_CNT_AUTO = 0x30, + PPMU_V2_CH_EV0_TYPE = 0x200, + PPMU_V2_CH_EV1_TYPE = 0x204, + PPMU_V2_CH_EV2_TYPE = 0x208, + PPMU_V2_CH_EV3_TYPE = 0x20c, + PPMU_V2_SM_ID_V = 0x220, + PPMU_V2_SM_ID_A = 0x224, + PPMU_V2_SM_OTHERS_V = 0x228, + PPMU_V2_SM_OTHERS_A = 0x22c, + PPMU_V2_INTERRUPT_RESET = 0x260, +}; + +/* PMNC register */ +#define PPMU_V2_PMNC_START_MODE_SHIFT 20 +#define PPMU_V2_PMNC_START_MODE_MASK (0x3 << PPMU_V2_PMNC_START_MODE_SHIFT) + +#define PPMU_PMNC_CC_RESET_SHIFT 2 +#define PPMU_PMNC_COUNTER_RESET_SHIFT 1 +#define PPMU_PMNC_ENABLE_SHIFT 0 +#define PPMU_PMNC_START_MODE_MASK BIT(16) +#define PPMU_PMNC_CC_DIVIDER_MASK BIT(3) +#define PPMU_PMNC_CC_RESET_MASK BIT(2) +#define PPMU_PMNC_COUNTER_RESET_MASK BIT(1) +#define PPMU_PMNC_ENABLE_MASK BIT(0) + +#define PPMU_V2_PMNCT(x) (PPMU_V2_PMCNT0 + (0x4 * x)) +#define PPMU_V2_CH_EVx_TYPE(x) (PPMU_V2_CH_EV0_TYPE + (0x4 * x)) + #endif /* __EXYNOS_PPMU_H__ */ -- cgit v0.10.2 From d80f02231a1ee891f56f06ee6e4646db6ba28836 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 24 Jul 2015 13:17:25 +0900 Subject: PM / devfreq: exynos-ppmu: Update documentation to support PPMUv2 This patch updates the documentation to include the information of PPMUv2. The PPMUv2 is used for Exynos5433 and Exynos7420 to monitor the performance of each IP in Exynos SoC. Cc: MyungJoo Ham Cc: Kyungmin Park Signed-off-by: Chanwoo Choi diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt index aed4866..3e36c1d 100644 --- a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt +++ b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt @@ -11,14 +11,14 @@ to various devfreq devices. The devfreq devices would use the event data when derterming the current state of each IP. Required properties: -- compatible: Should be "samsung,exynos-ppmu". +- compatible: Should be "samsung,exynos-ppmu" or "samsung,exynos-ppmu-v2. - reg: physical base address of each PPMU and length of memory mapped region. Optional properties: - clock-names : the name of clock used by the PPMU, "ppmu" - clocks : phandles for clock specified in "clock-names" property -Example1 : PPMU nodes in exynos3250.dtsi are listed below. +Example1 : PPMUv1 nodes in exynos3250.dtsi are listed below. ppmu_dmc0: ppmu_dmc0@106a0000 { compatible = "samsung,exynos-ppmu"; @@ -107,3 +107,41 @@ Example2 : Events of each PPMU node in exynos3250-rinato.dts are listed below. }; }; }; + +Example3 : PPMUv2 nodes in exynos5433.dtsi are listed below. + + ppmu_d0_cpu: ppmu_d0_cpu@10480000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x10480000 0x2000>; + status = "disabled"; + }; + + ppmu_d0_general: ppmu_d0_general@10490000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x10490000 0x2000>; + status = "disabled"; + }; + + ppmu_d0_rt: ppmu_d0_rt@104a0000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x104a0000 0x2000>; + status = "disabled"; + }; + + ppmu_d1_cpu: ppmu_d1_cpu@104b0000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x104b0000 0x2000>; + status = "disabled"; + }; + + ppmu_d1_general: ppmu_d1_general@104c0000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x104c0000 0x2000>; + status = "disabled"; + }; + + ppmu_d1_rt: ppmu_d1_rt@104d0000 { + compatible = "samsung,exynos-ppmu-v2"; + reg = <0x104d0000 0x2000>; + status = "disabled"; + }; -- cgit v0.10.2 From 86fa4cdb0f1e16d7e27278e29e4f0b9ba22f1a9b Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Wed, 5 Aug 2015 10:55:10 +0900 Subject: PM / devfreq: exynos-ppmu: bit-wise operation bugfix. Make it u64 before left-shifting 32bits. Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index f9901f5..f312485 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -319,7 +319,8 @@ static int exynos_ppmu_v2_get_event(struct devfreq_event_dev *edev, case PPMU_PMNCNT3: pmcnt_high = __raw_readl(info->ppmu.base + PPMU_V2_PMCNT3_HIGH); pmcnt_low = __raw_readl(info->ppmu.base + PPMU_V2_PMCNT3_LOW); - load_count = (u64)((pmcnt_high & 0xff) << 32) + (u64)pmcnt_low; + load_count = ((u64)((pmcnt_high & 0xff)) << 32) + + (u64)pmcnt_low; break; } edata->load_count = load_count; -- cgit v0.10.2 From 9348da2f1c2ca8e064d4907cdc5b3a19477df933 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 10 Aug 2015 11:42:25 +0530 Subject: PM / devfreq: Drop unlikely before IS_ERR(_OR_NULL) IS_ERR(_OR_NULL) already contain an 'unlikely' compiler flag and there is no need to do that again from its callers. Drop it. Signed-off-by: Viresh Kumar Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ca1b362..f28a1fa 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -53,7 +53,7 @@ static struct devfreq *find_device_devfreq(struct device *dev) { struct devfreq *tmp_devfreq; - if (unlikely(IS_ERR_OR_NULL(dev))) { + if (IS_ERR_OR_NULL(dev)) { pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); } @@ -133,7 +133,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) { struct devfreq_governor *tmp_governor; - if (unlikely(IS_ERR_OR_NULL(name))) { + if (IS_ERR_OR_NULL(name)) { pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); } -- cgit v0.10.2 From 08e75e754a6d9838e490b74551d19fc04d0fd6f9 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 14 Aug 2015 18:56:56 +0100 Subject: PM / devfreq: cache the last call to get_dev_status() The return value of get_dev_status() can be reused. Cache it so that other parts of the kernel can reuse it instead of having to call the same function again. Cc: Kyungmin Park Signed-off-by: Javi Merino Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index 0720ba8..ae72ba5 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -21,17 +21,20 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, unsigned long *freq) { - struct devfreq_dev_status stat; - int err = df->profile->get_dev_status(df->dev.parent, &stat); + int err; + struct devfreq_dev_status *stat; unsigned long long a, b; unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD; unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL; struct devfreq_simple_ondemand_data *data = df->data; unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX; + err = devfreq_update_stats(df); if (err) return err; + stat = &df->last_status; + if (data) { if (data->upthreshold) dfso_upthreshold = data->upthreshold; @@ -43,41 +46,41 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, return -EINVAL; /* Assume MAX if it is going to be divided by zero */ - if (stat.total_time == 0) { + if (stat->total_time == 0) { *freq = max; return 0; } /* Prevent overflow */ - if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) { - stat.busy_time >>= 7; - stat.total_time >>= 7; + if (stat->busy_time >= (1 << 24) || stat->total_time >= (1 << 24)) { + stat->busy_time >>= 7; + stat->total_time >>= 7; } /* Set MAX if it's busy enough */ - if (stat.busy_time * 100 > - stat.total_time * dfso_upthreshold) { + if (stat->busy_time * 100 > + stat->total_time * dfso_upthreshold) { *freq = max; return 0; } /* Set MAX if we do not know the initial frequency */ - if (stat.current_frequency == 0) { + if (stat->current_frequency == 0) { *freq = max; return 0; } /* Keep the current frequency */ - if (stat.busy_time * 100 > - stat.total_time * (dfso_upthreshold - dfso_downdifferential)) { - *freq = stat.current_frequency; + if (stat->busy_time * 100 > + stat->total_time * (dfso_upthreshold - dfso_downdifferential)) { + *freq = stat->current_frequency; return 0; } /* Set the desired frequency based on the load */ - a = stat.busy_time; - a *= stat.current_frequency; - b = div_u64(a, stat.total_time); + a = stat->busy_time; + a *= stat->current_frequency; + b = div_u64(a, stat->total_time); b *= 100; b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2)); *freq = (unsigned long) b; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index ce447f0..70a1c60 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -161,6 +161,7 @@ struct devfreq { struct delayed_work work; unsigned long previous_freq; + struct devfreq_dev_status last_status; void *data; /* private data for governors */ @@ -204,6 +205,15 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev, extern void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); +/** + * devfreq_update_stats() - update the last_status pointer in struct devfreq + * @df: the devfreq instance whose status needs updating + */ +static inline int devfreq_update_stats(struct devfreq *df) +{ + return df->profile->get_dev_status(df->dev.parent, &df->last_status); +} + #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq @@ -289,6 +299,11 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq) { } + +static inline int devfreq_update_stats(struct devfreq *df) +{ + return -EINVAL; +} #endif /* CONFIG_PM_DEVFREQ */ #endif /* __LINUX_DEVFREQ_H__ */ -- cgit v0.10.2 From d3b7e1745c0d1be2add1bb58065eef142c5a098f Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 14 Aug 2015 18:57:00 +0100 Subject: PM / devfreq: drop comment about thermal setting max_freq The thermal infrastructure should use the devfreq cooling device, which uses the OPP library to disable OPPs as necessary. Fix a couple of typos in the same comment while we are at it. Signed-off-by: Javi Merino Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index f28a1fa..1f3e5ea 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -177,10 +177,10 @@ int update_devfreq(struct devfreq *devfreq) return err; /* - * Adjust the freuqency with user freq and QoS. + * Adjust the frequency with user freq and QoS. * - * List from the highest proiority - * max_freq (probably called by thermal when it's too hot) + * List from the highest priority + * max_freq * min_freq */ -- cgit v0.10.2 From d54cdf3fc91aae3780433471d15d73413a845bc0 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Tue, 18 Aug 2015 13:45:49 +0900 Subject: PM / devfreq: comments for get_dev_status usage updated With the introduction of devfreq_update_stats(), governors are not recommended to use get_dev_status() directly. Signed-off-by: MyungJoo Ham diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 70a1c60..68030e2 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -65,7 +65,10 @@ struct devfreq_dev_status { * The "flags" parameter's possible values are * explained above with "DEVFREQ_FLAG_*" macros. * @get_dev_status: The device should provide the current performance - * status to devfreq, which is used by governors. + * status to devfreq. Governors are recommended not to + * use this directly. Instead, governors are recommended + * to use devfreq_update_stats() along with + * devfreq.last_status. * @get_cur_freq: The device should provide the current frequency * at which it is operating. * @exit: An optional callback that is called when devfreq @@ -208,6 +211,10 @@ extern void devm_devfreq_unregister_opp_notifier(struct device *dev, /** * devfreq_update_stats() - update the last_status pointer in struct devfreq * @df: the devfreq instance whose status needs updating + * + * Governors are recommended to use this function along with last_status, + * which allows other entities to reuse the last_status without affecting + * the values fetched later by governors. */ static inline int devfreq_update_stats(struct devfreq *df) { -- cgit v0.10.2 From 14de3903181ca41dde73d57ad4f7a79a314ee6bf Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Tue, 18 Aug 2015 13:47:41 +0900 Subject: PM / devfreq: tegra: Update governor to use devfreq_update_stats() Direct invocation of get_dev_status() is no more recommended. Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 13a1a6e..848b93e 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -541,18 +541,20 @@ static struct devfreq_dev_profile tegra_devfreq_profile = { static int tegra_governor_get_target(struct devfreq *devfreq, unsigned long *freq) { - struct devfreq_dev_status stat; + struct devfreq_dev_status *stat; struct tegra_devfreq *tegra; struct tegra_devfreq_device *dev; unsigned long target_freq = 0; unsigned int i; int err; - err = devfreq->profile->get_dev_status(devfreq->dev.parent, &stat); + err = devfreq_update_stats(devfreq); if (err) return err; - tegra = stat.private_data; + stat = &devfreq->last_status; + + tegra = stat->private_data; for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { dev = &tegra->devices[i]; -- cgit v0.10.2 From 5f25f066f75a67835abb5e400471a27abd09395b Mon Sep 17 00:00:00 2001 From: Xiaolong Ye Date: Fri, 11 Sep 2015 11:05:23 +0800 Subject: PM / devfreq: Fix incorrect type issue. time_in_state in struct devfreq is defined as unsigned long, so devm_kzalloc should use sizeof(unsigned long) as argument instead of sizeof(unsigned int), otherwise it will cause unexpected result in 64bit system. Signed-off-by: Xiaolong Ye Signed-off-by: Kevin Liu Signed-off-by: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 1f3e5ea..3927ed9 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -482,7 +482,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->profile->max_state * devfreq->profile->max_state, GFP_KERNEL); - devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned int) * + devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) * devfreq->profile->max_state, GFP_KERNEL); devfreq->last_stat_updated = jiffies; -- cgit v0.10.2 From 43b3f02899f74ae9914a39547cc5492156f0027a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2015 17:25:23 +0200 Subject: locking/qspinlock/x86: Fix performance regression under unaccelerated VMs Dave ran into horrible performance on a VM without PARAVIRT_SPINLOCKS set and Linus noted that the test-and-set implementation was retarded. One should spin on the variable with a load, not a RMW. While there, remove 'queued' from the name, as the lock isn't queued at all, but a simple test-and-set. Suggested-by: Linus Torvalds Reported-by: Dave Chinner Tested-by: Dave Chinner Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/20150904152523.GR18673@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae..8dde3bd 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,15 +39,23 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock +#define virt_spin_lock virt_spin_lock -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 83bfb87..e2aadbc 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); } -#ifndef virt_queued_spin_lock -static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifndef virt_spin_lock +static __always_inline bool virt_spin_lock(struct qspinlock *lock) { return false; } diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c881..87e9ce6a 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) if (pv_enabled()) goto queue; - if (virt_queued_spin_lock(lock)) + if (virt_spin_lock(lock)) return; /* -- cgit v0.10.2 From a6b277857fd2c990bc208ca1958d3f34d26052f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 5 Sep 2015 16:55:05 +0200 Subject: locking/qspinlock/x86: Only emit the test-and-set fallback when building guest support Only emit the test-and-set fallback for Hypervisors lacking PARAVIRT_SPINLOCKS support when building for guests. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.2 Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 8dde3bd..eaba080 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,8 +39,8 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif +#ifdef CONFIG_PARAVIRT #define virt_spin_lock virt_spin_lock - static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -59,6 +59,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } +#endif /* CONFIG_PARAVIRT */ #include -- cgit v0.10.2 From 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 28 Aug 2015 14:55:56 +0800 Subject: sched: 'Annotate' migrate_tasks() Kernel testing triggered this warning: | WARNING: CPU: 0 PID: 13 at kernel/sched/core.c:1156 do_set_cpus_allowed+0x7e/0x80() | Modules linked in: | CPU: 0 PID: 13 Comm: migration/0 Not tainted 4.2.0-rc1-00049-g25834c7 #2 | Call Trace: | dump_stack+0x4b/0x75 | warn_slowpath_common+0x8b/0xc0 | warn_slowpath_null+0x22/0x30 | do_set_cpus_allowed+0x7e/0x80 | cpuset_cpus_allowed_fallback+0x7c/0x170 | select_fallback_rq+0x221/0x280 | migration_call+0xe3/0x250 | notifier_call_chain+0x53/0x70 | __raw_notifier_call_chain+0x1e/0x30 | cpu_notify+0x28/0x50 | take_cpu_down+0x22/0x40 | multi_cpu_stop+0xd5/0x140 | cpu_stopper_thread+0xbc/0x170 | smpboot_thread_fn+0x174/0x2f0 | kthread+0xc4/0xe0 | ret_from_kernel_thread+0x21/0x30 As Peterz pointed out: | So the normal rules for changing task_struct::cpus_allowed are holding | both pi_lock and rq->lock, such that holding either stabilizes the mask. | | This is so that wakeup can happen without rq->lock and load-balance | without pi_lock. | | From this we already get the relaxation that we can omit acquiring | rq->lock if the task is not on the rq, because in that case | load-balancing will not apply to it. | | ** these are the rules currently tested in do_set_cpus_allowed() ** | | Now, since __set_cpus_allowed_ptr() uses task_rq_lock() which | unconditionally acquires both locks, we could get away with holding just | rq->lock when on_rq for modification because that'd still exclude | __set_cpus_allowed_ptr(), it would also work against | __kthread_bind_mask() because that assumes !on_rq. | | That said, this is all somewhat fragile. | | Now, I don't think dropping rq->lock is quite as disastrous as it | usually is because !cpu_active at this point, which means load-balance | will not interfere, but that too is somewhat fragile. | | So we end up with a choice of two fragile.. This patch fixes it by following the rules for changing task_struct::cpus_allowed with both pi_lock and rq->lock held. Reported-by: kernel test robot Reported-by: Sasha Levin Signed-off-by: Wanpeng Li [ Modified changelog and patch. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/BLU436-SMTP1660820490DE202E3934ED3806E0@phx.gbl Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0902e4d..9b78670 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5183,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq) break; /* - * Ensure rq->lock covers the entire task selection - * until the migration. + * pick_next_task assumes pinned rq->lock. */ lockdep_pin_lock(&rq->lock); next = pick_next_task(rq, &fake_task); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); + /* + * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + lockdep_unpin_lock(&rq->lock); + raw_spin_unlock(&rq->lock); + raw_spin_lock(&next->pi_lock); + raw_spin_lock(&rq->lock); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { + raw_spin_unlock(&next->pi_lock); + continue; + } + /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); - lockdep_unpin_lock(&rq->lock); rq = __migrate_task(rq, next, dest_cpu); if (rq != dead_rq) { raw_spin_unlock(&rq->lock); rq = dead_rq; raw_spin_lock(&rq->lock); } + raw_spin_unlock(&next->pi_lock); } rq->stop = stop; -- cgit v0.10.2 From 8f9b565482c537821588444e09ff732c7d65ed6e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 30 Jul 2015 18:28:13 -0700 Subject: target/qla2xxx: Honor max_data_sg_nents I/O transfer limit This patch adds an optional fabric driver provided SGL limit that target-core will honor as it's own internal I/O maximum transfer length limit, as exposed by EVPD=0xb0 block limits parameters. This is required for handling cases when host I/O transfer length exceeds the requested EVPD block limits maximum transfer length. The initial user of this logic is qla2xxx, so that we can avoid having to reject I/Os from some legacy FC hosts where EVPD=0xb0 parameters are not honored. When se_cmd payload length exceeds the provided limit in target_check_max_data_sg_nents() code, se_cmd->data_length + se_cmd->prot_length are reset with se_cmd->residual_count plus underflow bit for outgoing TFO response callbacks. It also checks for existing CDB level underflow + overflow and recalculates final residual_count as necessary. Note this patch currently assumes 1:1 mapping of PAGE_SIZE per struct scatterlist entry. Reported-by: Craig Watson Cc: Craig Watson Tested-by: Himanshu Madhani Cc: Roland Dreier Cc: Arun Easi Cc: Giridhar Malavali Cc: Andrew Vasquez Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Martin K. Petersen Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index c621623..edeb3ae 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1808,6 +1808,11 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .module = THIS_MODULE, .name = "qla2xxx", .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), + /* + * XXX: Limit assumes single page per scatter-gather-list entry. + * Current maximum is ~4.9 MB per se_cmd->t_data_sg with PAGE_SIZE=4096 + */ + .max_data_sg_nents = 1200, .get_fabric_name = tcm_qla2xxx_get_fabric_name, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index a07d455..0e0456f 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -477,8 +477,8 @@ static sense_reason_t spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; - int have_tp = 0; - int opt, min; + u32 mtl = 0; + int have_tp = 0, opt, min; /* * Following spc3r22 section 6.5.3 Block Limits VPD page, when @@ -509,8 +509,15 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set MAXIMUM TRANSFER LENGTH + * + * XXX: Currently assumes single PAGE_SIZE per scatterlist for fabrics + * enforcing maximum HW scatter-gather-list entry limit */ - put_unaligned_be32(dev->dev_attrib.hw_max_sectors, &buf[8]); + if (cmd->se_tfo->max_data_sg_nents) { + mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE) / + dev->dev_attrib.block_size; + } + put_unaligned_be32(min_not_zero(mtl, dev->dev_attrib.hw_max_sectors), &buf[8]); /* * Set OPTIMAL TRANSFER LENGTH diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3f0b500..62bafaa 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1075,6 +1075,55 @@ transport_set_vpd_ident(struct t10_vpd *vpd, unsigned char *page_83) } EXPORT_SYMBOL(transport_set_vpd_ident); +static sense_reason_t +target_check_max_data_sg_nents(struct se_cmd *cmd, struct se_device *dev, + unsigned int size) +{ + u32 mtl; + + if (!cmd->se_tfo->max_data_sg_nents) + return TCM_NO_SENSE; + /* + * Check if fabric enforced maximum SGL entries per I/O descriptor + * exceeds se_cmd->data_length. If true, set SCF_UNDERFLOW_BIT + + * residual_count and reduce original cmd->data_length to maximum + * length based on single PAGE_SIZE entry scatter-lists. + */ + mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE); + if (cmd->data_length > mtl) { + /* + * If an existing CDB overflow is present, calculate new residual + * based on CDB size minus fabric maximum transfer length. + * + * If an existing CDB underflow is present, calculate new residual + * based on original cmd->data_length minus fabric maximum transfer + * length. + * + * Otherwise, set the underflow residual based on cmd->data_length + * minus fabric maximum transfer length. + */ + if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + cmd->residual_count = (size - mtl); + } else if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + u32 orig_dl = size + cmd->residual_count; + cmd->residual_count = (orig_dl - mtl); + } else { + cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT; + cmd->residual_count = (cmd->data_length - mtl); + } + cmd->data_length = mtl; + /* + * Reset sbc_check_prot() calculated protection payload + * length based upon the new smaller MTL. + */ + if (cmd->prot_length) { + u32 sectors = (mtl / dev->dev_attrib.block_size); + cmd->prot_length = dev->prot_length * sectors; + } + } + return TCM_NO_SENSE; +} + sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size) { @@ -1120,7 +1169,7 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) } } - return 0; + return target_check_max_data_sg_nents(cmd, dev, size); } diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 69355fe..7fb2557 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -5,6 +5,19 @@ struct target_core_fabric_ops { struct module *module; const char *name; size_t node_acl_size; + /* + * Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload. + * Setting this value tells target-core to enforce this limit, and + * report as INQUIRY EVPD=b0 MAXIMUM TRANSFER LENGTH. + * + * target-core will currently reset se_cmd->data_length to this + * maximum size, and set UNDERFLOW residual count if length exceeds + * this limit. + * + * XXX: Not all initiator hosts honor this block-limit EVPD + * XXX: Currently assumes single PAGE_SIZE per scatterlist entry + */ + u32 max_data_sg_nents; char *(*get_fabric_name)(void); char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); -- cgit v0.10.2 From 4416f89b8cfcb794d040fc3b68e5fb159b7d8d02 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 3 Sep 2015 06:30:45 +0000 Subject: target: Attach EXTENDED_COPY local I/O descriptors to xcopy_pt_sess This patch is a >= v4.1 regression bug-fix where control CDB emulation logic in commit 38b57f82 now expects a se_cmd->se_sess pointer to exist when determining T10-PI support is to be exposed for initiator host ports. To address this bug, go ahead and add locally generated se_cmd descriptors for copy-offload block-copy to it's own stand-alone se_session nexus, while the parent EXTENDED_COPY se_cmd descriptor remains associated with it's originating se_cmd->se_sess nexus. Note a valid se_cmd->se_sess is also required for future support of WRITE_INSERT and READ_STRIP software emulation when submitting backend I/O to se_device that exposes T10-PI suport. Reported-by: Alex Gorbachev Tested-by: Alex Gorbachev Cc: "Martin K. Petersen" Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Doug Gilbert Cc: # v4.1+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 4515f52..47fe94e 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -450,6 +450,8 @@ int target_xcopy_setup_pt(void) memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_cmd_list); + spin_lock_init(&xcopy_pt_sess.sess_cmd_lock); xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; @@ -644,7 +646,7 @@ static int target_xcopy_read_source( pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)src_lba, src_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->src_pt_cmd = xpt_cmd; @@ -704,7 +706,7 @@ static int target_xcopy_write_destination( pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)dst_lba, dst_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->dst_pt_cmd = xpt_cmd; -- cgit v0.10.2 From 7dd03aca9d61a9b64cd2a8cf6f5ea6f1f5391e8d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Sep 2015 06:14:18 -0700 Subject: target: Fix max_cmd_sn increment w/o cmdsn mutex regressions Current for-next iscsi target is broken: commit 109e2381749c1cfd94a0d22b2b54142539024973 Author: Roland Dreier Date: Thu Jul 23 14:53:32 2015 -0700 target: Drop iSCSI use of mutex around max_cmd_sn increment This patch fixes incorrect pr_debug() + atomic_inc_return() usage within iscsit_increment_maxcmdsn() code. Also fix funny iscsit_determine_maxcmdsn() usage and update iscsi_target_do_tx_login_io() code. Reported-by: Sagi Grimberg Cc: Sagi Grimberg Signed-off-by: Roland Dreier Cc: Roland Dreier Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 07d2ef6..0382fa2 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -47,16 +47,19 @@ void iscsit_determine_maxcmdsn(struct iscsi_session *sess) * core_set_queue_depth_for_node(). */ sess->cmdsn_window = se_nacl->queue_depth; - atomic_set(&sess->max_cmd_sn, (u32) atomic_read(&sess->max_cmd_sn) + se_nacl->queue_depth - 1); + atomic_add(se_nacl->queue_depth - 1, &sess->max_cmd_sn); } void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess) { + u32 max_cmd_sn; + if (cmd->immediate_cmd || cmd->maxcmdsn_inc) return; cmd->maxcmdsn_inc = 1; - pr_debug("Updated MaxCmdSN to 0x%08x\n", atomic_inc_return(&sess->max_cmd_sn)); + max_cmd_sn = atomic_inc_return(&sess->max_cmd_sn); + pr_debug("Updated MaxCmdSN to 0x%08x\n", max_cmd_sn); } EXPORT_SYMBOL(iscsit_increment_maxcmdsn); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 74d041e..4d08afe 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -366,8 +366,6 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log return -1; login->rsp_length = 0; - login_rsp->exp_cmdsn = cpu_to_be32(login_rsp->exp_cmdsn); - login_rsp->max_cmdsn = cpu_to_be32(login_rsp->max_cmdsn); return 0; } -- cgit v0.10.2 From 4824640ec3fc84337cb2baa9fb780e95864feb88 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 3 Sep 2015 16:03:42 -0700 Subject: target/user: Remove unused variable We don't use it any more. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c448ef4..9fd50be 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -577,7 +577,6 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) { struct tcmu_mailbox *mb; - LIST_HEAD(cpl_cmds); unsigned long flags; int handled = 0; -- cgit v0.10.2 From 06b967e429cfb76494badb9ffdd69e934ba72c77 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 3 Sep 2015 16:03:43 -0700 Subject: target: Remove no-op conditional This does nothing, and there are many other places where transport_cmd_check_stop_to_fabric()'s retval is not checked>, If we wanted to check it here, we should probably do it those other places too. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 62bafaa..5bacc7b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1741,8 +1741,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, check_stop: transport_lun_remove_cmd(cmd); - if (!transport_cmd_check_stop_to_fabric(cmd)) - ; + transport_cmd_check_stop_to_fabric(cmd); return; queue_full: -- cgit v0.10.2 From ed97d0cd78a337450e17eb613bdeec15e729af46 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 3 Sep 2015 16:03:44 -0700 Subject: target/user: Fix UFLAG_UNKNOWN_OP handling Calling transport_generic_request_failure() from here causes list corruption. We should be using target_complete_cmd() instead. Which we do in all other cases, so the UNKNOWN_OP case can become just another member of the big else/if chain in tcmu_handle_completion(). Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9fd50be..d0bb652 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -538,14 +538,8 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", cmd->se_cmd); - transport_generic_request_failure(cmd->se_cmd, - TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); - cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); - return; - } - - if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { + entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; + } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer, se_cmd->scsi_sense_length); -- cgit v0.10.2 From ac64a2ce509104a746321a4f9646b6750cf281eb Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 4 Sep 2015 01:39:56 +0200 Subject: target: use stringify.h instead of own definition Signed-off-by: David Disseldorp Acked-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index d0bb652..937cebf 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -898,7 +899,7 @@ static int tcmu_configure_device(struct se_device *dev) WARN_ON(!PAGE_ALIGNED(udev->data_off)); WARN_ON(udev->data_size % PAGE_SIZE); - info->version = xstr(TCMU_MAILBOX_VERSION); + info->version = __stringify(TCMU_MAILBOX_VERSION); info->mem[0].name = "tcm-user command & data buffer"; info->mem[0].addr = (phys_addr_t) udev->mb_addr; diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index b67f99d..95c6521 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -42,10 +42,6 @@ #define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ -/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */ -#define xstr(s) str(s) -#define str(s) #s - struct tcmu_mailbox { __u16 version; __u16 flags; -- cgit v0.10.2 From d249872939bfa86c9cce44a56a8cbdbc7086519b Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 17 Jul 2015 16:34:10 +0300 Subject: perf/x86/intel/bts: Set event->hw.itrace_started in pmu::start to match the new logic Since event->hw.itrace_started is now set in pmu::start() to signal the beginning of the trace, do so also in the intel_bts driver. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1437140050-23363-4-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index 54690e8..d1c0f25 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event) if (!buf || bts_buffer_is_full(buf, bts)) return; + event->hw.itrace_started = 1; event->hw.state = 0; if (!buf->snapshot) -- cgit v0.10.2 From 75881df3fd7708f234c1e2573ade812eb5701708 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 10 Sep 2015 18:01:44 +0530 Subject: ASoC: dapm: fix memory leak Incase of an unknown event we were directly returning but we missed freeing params. Signed-off-by: Sudip Mukherjee Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index f4bf21a..ff8bda4 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3501,7 +3501,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, default: WARN(1, "Unknown event %d\n", event); - return -EINVAL; + ret = -EINVAL; } out: -- cgit v0.10.2 From f17b329b73a0393dc9d5fc5b4457189f92e5bbef Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Thu, 10 Sep 2015 13:40:16 +0800 Subject: ASoC: rt5645: Remove incorrect settings The patch removes the incorrect settings to avoid the pop sound in the first playback with headphone after boot. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 4972bf3..595ace9 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2829,10 +2829,6 @@ static int rt5645_jack_detect(struct snd_soc_codec *codec, int jack_insert) snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; } - - snd_soc_update_bits(codec, RT5645_CHARGE_PUMP, 0x0300, 0x0200); - snd_soc_write(codec, RT5645_DEPOP_M1, 0x001d); - snd_soc_write(codec, RT5645_DEPOP_M1, 0x0001); } else { /* jack out */ rt5645->jack_type = 0; @@ -2880,8 +2876,6 @@ int rt5645_set_jack_detect(struct snd_soc_codec *codec, rt5645->en_button_func = true; regmap_update_bits(rt5645->regmap, RT5645_GPIO_CTRL1, RT5645_GP1_PIN_IRQ, RT5645_GP1_PIN_IRQ); - regmap_update_bits(rt5645->regmap, RT5645_DEPOP_M1, - RT5645_HP_CB_MASK, RT5645_HP_CB_PU); regmap_update_bits(rt5645->regmap, RT5645_GEN_CTRL1, RT5645_DIG_GATE_CTRL, RT5645_DIG_GATE_CTRL); } -- cgit v0.10.2 From dd85ebf681ef0ee1fc985c353dd45e8b53b5dc1e Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 10 Sep 2015 16:48:13 +0530 Subject: spi: spidev: fix possible NULL dereference During the last close we are freeing spidev if spidev->spi is NULL, but just before checking if spidev->spi is NULL we are dereferencing it. Lets add a check there to avoid the NULL dereference. Fixes: 9169051617df ("spi: spidev: Don't mangle max_speed_hz in underlying spi device") Signed-off-by: Sudip Mukherjee Reviewed-by: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Mark Brown diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index dd616ff..e58ca17 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -651,7 +651,8 @@ static int spidev_release(struct inode *inode, struct file *filp) kfree(spidev->rx_buffer); spidev->rx_buffer = NULL; - spidev->speed_hz = spidev->spi->max_speed_hz; + if (spidev->spi) + spidev->speed_hz = spidev->spi->max_speed_hz; /* ... after we unbound from the underlying device? */ spin_lock_irq(&spidev->spi_lock); -- cgit v0.10.2 From 3758ff5f3dab57cd768d54279962a2f6bbc17188 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Wed, 9 Sep 2015 19:29:10 +0800 Subject: ASoC: wm8960: correct the min gain value of some PGA The min gain is the corresponding gain value when the register value is 0 instead of 1, just correct it. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 94c5c46..9c73049 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -205,11 +205,11 @@ static int wm8960_put_deemph(struct snd_kcontrol *kcontrol, return wm8960_set_deemph(codec); } -static const DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 50, 0); -static const DECLARE_TLV_DB_SCALE(dac_tlv, -12700, 50, 1); +static const DECLARE_TLV_DB_SCALE(adc_tlv, -9750, 50, 1); +static const DECLARE_TLV_DB_SCALE(dac_tlv, -12750, 50, 1); static const DECLARE_TLV_DB_SCALE(bypass_tlv, -2100, 300, 0); static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1); -static const DECLARE_TLV_DB_SCALE(boost_tlv, -1200, 300, 1); +static const DECLARE_TLV_DB_SCALE(boost_tlv, -1500, 300, 1); static const struct snd_kcontrol_new wm8960_snd_controls[] = { SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL, -- cgit v0.10.2 From 7e90f9b2b56669260e5f6f97974735d187f79b7d Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Wed, 9 Sep 2015 19:29:11 +0800 Subject: ASoC: wm8960: correct gain value for input PGA and add microphone PGA The input PGAs have a gain range from -17.25dB to +30dB in 0.75dB steps. The boost stage can provide additional gain. For line inputs, -12dB to +6dB gain is available on the boost mixer. For micphone inputs, it can provide up to +29dB additional gain from the microphone PGA. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 9c73049..70816f0 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -206,27 +206,37 @@ static int wm8960_put_deemph(struct snd_kcontrol *kcontrol, } static const DECLARE_TLV_DB_SCALE(adc_tlv, -9750, 50, 1); +static const DECLARE_TLV_DB_SCALE(inpga_tlv, -1725, 75, 0); static const DECLARE_TLV_DB_SCALE(dac_tlv, -12750, 50, 1); static const DECLARE_TLV_DB_SCALE(bypass_tlv, -2100, 300, 0); static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1); -static const DECLARE_TLV_DB_SCALE(boost_tlv, -1500, 300, 1); +static const DECLARE_TLV_DB_SCALE(lineinboost_tlv, -1500, 300, 1); +static const unsigned int micboost_tlv[] = { + TLV_DB_RANGE_HEAD(2), + 0, 1, TLV_DB_SCALE_ITEM(0, 1300, 0), + 2, 3, TLV_DB_SCALE_ITEM(2000, 900, 0), +}; static const struct snd_kcontrol_new wm8960_snd_controls[] = { SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL, - 0, 63, 0, adc_tlv), + 0, 63, 0, inpga_tlv), SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL, 6, 1, 0), SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL, 7, 1, 0), SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume", - WM8960_INBMIX1, 4, 7, 0, boost_tlv), + WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv), SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT2 Volume", - WM8960_INBMIX1, 1, 7, 0, boost_tlv), + WM8960_INBMIX1, 1, 7, 0, lineinboost_tlv), SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT3 Volume", - WM8960_INBMIX2, 4, 7, 0, boost_tlv), + WM8960_INBMIX2, 4, 7, 0, lineinboost_tlv), SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT2 Volume", - WM8960_INBMIX2, 1, 7, 0, boost_tlv), + WM8960_INBMIX2, 1, 7, 0, lineinboost_tlv), +SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT1 Volume", + WM8960_RINPATH, 4, 4, 0, micboost_tlv), +SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT1 Volume", + WM8960_LINPATH, 4, 4, 0, micboost_tlv), SOC_DOUBLE_R_TLV("Playback Volume", WM8960_LDAC, WM8960_RDAC, 0, 255, 0, dac_tlv), -- cgit v0.10.2 From 84cba178a3b88efe2668a9039f70abda072faa21 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 10 Sep 2015 13:11:55 +0300 Subject: crypto: testmgr - don't copy from source IV too much While the destination buffer 'iv' is MAX_IVLEN size, the source 'template[i].iv' could be smaller, thus memcpy may read read invalid memory. Use crypto_skcipher_ivsize() to get real ivsize and pass it to memcpy. Signed-off-by: Andrey Ryabinin Signed-off-by: Herbert Xu diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 35c2de1..fa18753 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -940,6 +940,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, char *xbuf[XBUFSIZE]; char *xoutbuf[XBUFSIZE]; int ret = -ENOMEM; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); if (testmgr_alloc_buf(xbuf)) goto out_nobuf; @@ -975,7 +976,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); @@ -1051,7 +1052,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); -- cgit v0.10.2 From 4c17a6d56bb0cad3066a714e94f7185a24b40f49 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 11 Sep 2015 16:27:27 +0200 Subject: CIFS: fix type confusion in copy offload ioctl This might lead to local privilege escalation (code execution as kernel) for systems where the following conditions are met: - CONFIG_CIFS_SMB2 and CONFIG_CIFS_POSIX are enabled - a cifs filesystem is mounted where: - the mount option "vers" was used and set to a value >=2.0 - the attacker has write access to at least one file on the filesystem To attack this, an attacker would have to guess the target_tcon pointer (but guessing wrong doesn't cause a crash, it just returns an error code) and win a narrow race. CC: Stable Signed-off-by: Jann Horn Signed-off-by: Steve French diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index c63f522..28a77bf 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); -- cgit v0.10.2 From 7f39add3b08cbbdb99abe50e6d7c342e6800d684 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 11 Sep 2015 09:03:04 -0600 Subject: block: Refuse request/bio merges with gaps in the integrity payload If a driver sets the block queue virtual boundary mask, it means that it cannot handle gaps so we must not allow those in the integrity payload as well. Signed-off-by: Sagi Grimberg Fixed up by me to have duplicate integrity merge functions, depending on whether block integrity is enabled or not. Fixes a compilations issue with CONFIG_BLK_DEV_INTEGRITY unset. Signed-off-by: Jens Axboe diff --git a/block/blk-integrity.c b/block/blk-integrity.c index f548b64..75f29cf 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -204,6 +204,9 @@ bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, q->limits.max_integrity_segments) return false; + if (integrity_req_gap_back_merge(req, next->bio)) + return false; + return true; } EXPORT_SYMBOL(blk_integrity_merge_rq); diff --git a/block/blk-merge.c b/block/blk-merge.c index d9eddbc..574ea7c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -440,6 +440,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, { if (req_gap_back_merge(req, bio)) return 0; + if (blk_integrity_rq(req) && + integrity_req_gap_back_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; @@ -461,6 +464,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, if (req_gap_front_merge(req, bio)) return 0; + if (blk_integrity_rq(req) && + integrity_req_gap_front_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2ff94de..1aac731 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1514,6 +1514,26 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1580,6 +1600,16 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g) { return 0; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v0.10.2 From 87a816df537e096d404add543ef47b796906c130 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 8 Sep 2015 09:33:35 -0600 Subject: block: Refuse adding appending a gapped integrity page to a bio This is only theoretical at the moment given that the only subsystems that generate integrity payloads are the block layer itself and the scsi target (which generate well aligned integrity payloads). But when we will expose integrity meta-data to user-space, we'll need to refuse appending a page with a gap (if the queue virtual boundary is set). Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 4aecca7..14b8faf 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -140,6 +140,11 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, iv = bip->bip_vec + bip->bip_vcnt; + if (bip->bip_vcnt && + bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), + &bip->bip_vec[bip->bip_vcnt - 1], offset)) + return 0; + iv->bv_page = page; iv->bv_len = len; iv->bv_offset = offset; -- cgit v0.10.2 From 46348456c1791053dcbe5a9e21825b10a3c8a8fb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 3 Sep 2015 19:28:23 +0300 Subject: block: Copy a user iovec if it includes gaps For drivers that don't support gaps in the SG lists handed to them we must bounce (copy the user buffers) and pass a bio that does not include gaps. This doesn't matter for any current user, but will help to allow iser which can't handle gaps to use the block virtual boundary instead of using driver-local bounce buffering when handling SG_IO commands. Signed-off-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe diff --git a/block/blk-map.c b/block/blk-map.c index 2338416..f565e11 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,6 +9,24 @@ #include "blk.h" +static bool iovec_gap_to_prv(struct request_queue *q, + struct iovec *prv, struct iovec *cur) +{ + unsigned long prev_end; + + if (!queue_virt_boundary(q)) + return false; + + if (prv->iov_base == NULL && prv->iov_len == 0) + /* prv is not set - don't check */ + return false; + + prev_end = (unsigned long)(prv->iov_base + prv->iov_len); + + return (((unsigned long)cur->iov_base & queue_virt_boundary(q)) || + prev_end & queue_virt_boundary(q)); +} + int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio) { @@ -67,7 +85,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct bio *bio; int unaligned = 0; struct iov_iter i; - struct iovec iov; + struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; if (!iter || !iter->count) return -EINVAL; @@ -81,8 +99,12 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, /* * Keep going so we check length of all segments */ - if (uaddr & queue_dma_alignment(q)) + if ((uaddr & queue_dma_alignment(q)) || + iovec_gap_to_prv(q, &prv, &iov)) unaligned = 1; + + prv.iov_base = iov.iov_base; + prv.iov_len = iov.iov_len; } if (unaligned || (q->dma_pad_mask & iter->count) || map_data) -- cgit v0.10.2 From 6fe810bda0bd9a5d7674fc671fac27b8aa8ec243 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 5 Sep 2015 15:47:36 -0400 Subject: block: blkg_destroy_all() should clear q->root_blkg and ->root_rl.blkg While making the root blkg unconditional, ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root blkcg") removed the part which clears q->root_blkg and ->root_rl.blkg during q exit. This leaves the two pointers dangling after blkg_destroy_all(). blk-throttle exit path performs blkg traversals and dereferences ->root_blkg and can lead to the following oops. BUG: unable to handle kernel NULL pointer dereference at 0000000000000558 IP: [] __blkg_lookup+0x26/0x70 ... task: ffff88001b4e2580 ti: ffff88001ac0c000 task.ti: ffff88001ac0c000 RIP: 0010:[] [] __blkg_lookup+0x26/0x70 ... Call Trace: [] blk_throtl_drain+0x5a/0x110 [] blkcg_drain_queue+0x18/0x20 [] __blk_drain_queue+0xc0/0x170 [] blk_queue_bypass_start+0x61/0x80 [] blkcg_deactivate_policy+0x39/0x100 [] blk_throtl_exit+0x38/0x50 [] blkcg_exit_queue+0x3e/0x50 [] blk_release_queue+0x1e/0xc0 ... While the bug is a straigh-forward use-after-free bug, it is tricky to reproduce because blkg release is RCU protected and the rest of exit path usually finishes before RCU grace period. This patch fixes the bug by updating blkg_destro_all() to clear q->root_blkg and ->root_rl.blkg. Signed-off-by: Tejun Heo Reported-by: "Richard W.M. Jones" Reported-by: Josh Boyer Link: http://lkml.kernel.org/g/CA+5PVA5rzQ0s4723n5rHBcxQa9t0cW8BPPBekr_9aMRoWt2aYg@mail.gmail.com Fixes: ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root blkcg") Cc: stable@vger.kernel.org # v4.2+ Tested-by: Richard W.M. Jones Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index d6283b3..9cc48d1d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -387,6 +387,9 @@ static void blkg_destroy_all(struct request_queue *q) blkg_destroy(blkg); spin_unlock(&blkcg->lock); } + + q->root_blkg = NULL; + q->root_rl.blkg = NULL; } /* -- cgit v0.10.2 From 716ff1921a86c637b8875c7bb312fc6755fa9300 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:17:39 +0100 Subject: ARM: domains: thread_info.h no longer needs asm/domains.h As of 1eef5d2f1b46 ("ARM: domains: switch to keeping domain value in register") we no longer need to include asm/domains.h into asm/thread_info.h. Remove it. Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 0a0aec4..ae02e68 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -25,7 +25,6 @@ struct task_struct; #include -#include typedef unsigned long mm_segment_t; -- cgit v0.10.2 From 6e8f580d1fcc18e290713984c379cb97131c015a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:34:52 +0100 Subject: ARM: domains: add memory dependencies to get_domain/set_domain We need to have memory dependencies on get_domain/set_domain to avoid the compiler over-optimising these inline assembly instructions. Loads/stores must not be reordered across a set_domain(), so introduce a compiler barrier for that assembly. The value of get_domain() must not be cached across a set_domain(), but we still want to allow the compiler to optimise it away. Introduce a dependency on current_thread_info()->cpu_domain to avoid this; the new memory clobber in set_domain() should therefore cause the compiler to re-load this. The other advantage of using this is we should have its address in the register set already, or very soon after at most call sites. Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index e878129..fc8ba16 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include +#include #endif /* @@ -89,7 +90,8 @@ static inline unsigned int get_domain(void) asm( "mrc p15, 0, %0, c3, c0 @ get domain" - : "=r" (domain)); + : "=r" (domain) + : "m" (current_thread_info()->cpu_domain)); return domain; } @@ -98,7 +100,7 @@ static inline void set_domain(unsigned val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" - : : "r" (val)); + : : "r" (val) : "memory"); isb(); } -- cgit v0.10.2 From 0b61f2c0f37983c98ed4207f3f5e265938371b68 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 11 Sep 2015 17:25:59 +0100 Subject: arm/xen: Enable user access to the kernel before issuing a privcmd call When Xen is copying data to/from the guest it will check if the kernel has the right to do the access. If not, the hypercall will return an error. After the commit a5e090acbf545c0a3b04080f8a488b17ec41fe02 "ARM: software-based privileged-no-access support", the kernel can't access any longer the user space by default. This will result to fail on every hypercall made by the userspace (i.e via privcmd). We have to enable the userspace access and then restore the correct permission every time the privcmd is used to made an hypercall. I didn't find generic helpers to do a these operations, so the change is only arm32 specific. Reported-by: Riku Voipio Signed-off-by: Julien Grall Signed-off-by: Russell King diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index f00e080..10fd99c 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -98,8 +98,23 @@ ENTRY(privcmd_call) mov r1, r2 mov r2, r3 ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ ldr r4, [sp, #4] __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is + * called before. + */ + uaccess_disable r4 + ldm sp!, {r4} ret lr ENDPROC(privcmd_call); -- cgit v0.10.2 From a4a5a7379e4ca03c192b732d61e446994eb67bbc Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 11 Sep 2015 17:12:27 +0100 Subject: ARM: 8431/1: fix alignement of __bug_table section entries On old ARM chips, unaligned accesses to memory are not trapped and fixed. On module load, symbols are relocated, and the relocation of __bug_table symbols is done on a u32 basis. Yet the section is not aligned to a multiple of 4 address, but to a multiple of 2. This triggers an Oops on pxa architecture, where address 0xbf0021ea is the first relocation in the __bug_table section : apply_relocate(): pxa3xx_nand: section 13 reloc 0 sym '' Unable to handle kernel paging request at virtual address bf0021ea pgd = e1cd0000 [bf0021ea] *pgd=c1cce851, *pte=c1cde04f, *ppte=c1cde01f Internal error: Oops: 23 [#1] ARM Modules linked in: CPU: 0 PID: 606 Comm: insmod Not tainted 4.2.0-rc8-next-20150828-cm-x300+ #887 Hardware name: CM-X300 module task: e1c68700 ti: e1c3e000 task.ti: e1c3e000 PC is at apply_relocate+0x2f4/0x3d4 LR is at 0xbf0021ea pc : [] lr : [] psr: 80000013 sp : e1c3fe30 ip : 60000013 fp : e49e8c60 r10: e49e8fa8 r9 : 00000000 r8 : e49e7c58 r7 : e49e8c38 r6 : e49e8a58 r5 : e49e8920 r4 : e49e8918 r3 : bf0021ea r2 : bf007034 r1 : 00000000 r0 : bf000000 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 0000397f Table: c1cd0018 DAC: 00000051 Process insmod (pid: 606, stack limit = 0xe1c3e198) [] (apply_relocate) from [] (load_module+0x1248/0x1f5c) [] (load_module) from [] (SyS_init_module+0xe4/0x170) [] (SyS_init_module) from [] (ret_fast_syscall+0x0/0x38) Fix this by ensuring entries in __bug_table are all aligned to at least of multiple of 4. This transforms a module section __bug_table as : - [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 1 + [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 4 Signed-off-by: Robert Jarzmik Reviewed-by: Dave Martin Signed-off-by: Russell King diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index b274bde..e7335a9 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -40,6 +40,7 @@ do { \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ ".pushsection __bug_table,\"a\"\n" \ + ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ -- cgit v0.10.2 From 294ab783ad98066b87296db1311c7ba2a60206a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Sep 2015 18:04:18 +0200 Subject: scsi_dh: fix randconfig build error It looks like the Kconfig check that was meant to fix this (commit fe9233fb6914a0eb20166c967e3020f7f0fba2c9 [SCSI] scsi_dh: fix kconfig related build errors) was actually reversed, but no-one noticed until the new set of patches which separated DM and SCSI_DH). Fixes: fe9233fb6914a0eb20166c967e3020f7f0fba2c9 Signed-off-by: Christoph Hellwig Tested-by: Mike Snitzer Signed-off-by: James Bottomley diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b597273..e9ea681 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -393,7 +393,7 @@ config DM_MULTIPATH # of SCSI_DH if the latter isn't defined but if # it is, DM_MULTIPATH must depend on it. We get a build # error if SCSI_DH=m and DM_MULTIPATH=y - depends on SCSI_DH || !SCSI_DH + depends on !SCSI_DH || SCSI ---help--- Allow volume managers to support multipath hardware. -- cgit v0.10.2 From 0ba13fd19d39b7cb672bcec052bc813389c079a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Sep 2015 13:26:39 -0700 Subject: Revert "writeback: plug writeback at a high level" This reverts commit d353d7587d02116b9732d5c06615aed75a4d3a47. Doing the block layer plug/unplug inside writeback_sb_inodes() is broken, because that function is actually called with a spinlock held: wb->list_lock, as pointed out by Chris Mason. Chris suggested just dropping and re-taking the spinlock around the blk_finish_plug() call (the plgging itself can happen under the spinlock), and that would technically work, but is just disgusting. We do something fairly similar - but not quite as disgusting because we at least have a better reason for it - in writeback_single_inode(), so it's not like the caller can depend on the lock being held over the call, but in this case there just isn't any good reason for that "release and re-take the lock" pattern. [ In general, we should really strive to avoid the "release and retake" pattern for locks, because in the general case it can easily cause subtle bugs when the caller caches any state around the call that might be invalidated by dropping the lock even just temporarily. ] But in this case, the plugging should be easy to just move up to the callers before the spinlock is taken, which should even improve the effectiveness of the plug. So there is really no good reason to play games with locking here. I'll send off a test-patch so that Dave Chinner can verify that that plug movement works. In the meantime this just reverts the problematic commit and adds a comment to the function so that we hopefully don't make this mistake again. Reported-by: Chris Mason Cc: Josef Bacik Cc: Dave Chinner Cc: Neil Brown Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Linus Torvalds diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2448912..d8ea7ed 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1380,6 +1380,10 @@ static long writeback_chunk_size(struct bdi_writeback *wb, * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. + * + * NOTE! This is called with wb->list_lock held, and will + * unlock and relock that for each inode it ends up doing + * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, @@ -1398,9 +1402,7 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long wrote = 0; /* count both pages and inodes */ - struct blk_plug plug; - blk_start_plug(&plug); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); @@ -1498,7 +1500,6 @@ static long writeback_sb_inodes(struct super_block *sb, break; } } - blk_finish_plug(&plug); return wrote; } -- cgit v0.10.2 From 1853c949646005b5959c483becde86608f548f24 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Sep 2015 20:05:46 +0200 Subject: netlink, mmap: transform mmap skb into full skb on taps Ken-ichirou reported that running netlink in mmap mode for receive in combination with nlmon will throw a NULL pointer dereference in __kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable to handle kernel paging request". The problem is the skb_clone() in __netlink_deliver_tap_skb() for skbs that are mmaped. I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink skb has it pointed to netlink_skb_destructor(), set in the handler netlink_ring_setup_skb(). There, skb->head is being set to NULL, so that in such cases, __kfree_skb() doesn't perform a skb_release_data() via skb_release_all(), where skb->head is possibly being freed through kfree(head) into slab allocator, although netlink mmap skb->head points to the mmap buffer. Similarly, the same has to be done also for large netlink skbs where the data area is vmalloced. Therefore, as discussed, make a copy for these rather rare cases for now. This fixes the issue on my and Ken-ichirou's test-cases. Reference: http://thread.gmane.org/gmane.linux.network/371129 Fixes: bcbde0d449ed ("net: netlink: virtual tap device management") Reported-by: Ken-ichirou MATSUZAWA Signed-off-by: Daniel Borkmann Tested-by: Ken-ichirou MATSUZAWA Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7f86d3b..4cad99d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -125,6 +125,24 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } +static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) +{ + unsigned int len = skb_end_offset(skb); + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); + if (new == NULL) + return NULL; + + NETLINK_CB(new).portid = NETLINK_CB(skb).portid; + NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; + NETLINK_CB(new).creds = NETLINK_CB(skb).creds; + + memcpy(skb_put(new, len), skb->data, len); + return new; +} + int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -206,7 +224,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, int ret = -ENOMEM; dev_hold(dev); - nskb = skb_clone(skb, GFP_ATOMIC); + + if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + nskb = netlink_to_full_skb(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); @@ -279,11 +301,6 @@ static void netlink_rcv_wake(struct sock *sk) } #ifdef CONFIG_NETLINK_MMAP -static bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -} - static bool netlink_rx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -846,7 +863,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) } #else /* CONFIG_NETLINK_MMAP */ -#define netlink_skb_is_mmaped(skb) false #define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 8900840..df9a060 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -59,6 +59,15 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } +static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +#else + return false; +#endif /* CONFIG_NETLINK_MMAP */ +} + struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; -- cgit v0.10.2 From 19539ce783dd27768d4f7f3b753152bf983db65b Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Thu, 10 Sep 2015 18:25:07 -0600 Subject: ebpf: emit correct src_reg for conditional jumps Instead of always emitting BPF_REG_X, let's emit BPF_REG_X only when the source actually is BPF_X. This causes programs generated by the classic converter to not be importable via bpf(), as the eBPF verifier checks that the src_reg is correct or 0. While not a problem yet, this will be a problem when BPF_PROG_DUMP lands, and we can potentially dump and re-import programs generated by the converter. Signed-off-by: Tycho Andersen CC: Alexei Starovoitov CC: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/net/core/filter.c b/net/core/filter.c index 13079f0..05a04ea 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -478,9 +478,9 @@ do_pass: bpf_src = BPF_X; } else { insn->dst_reg = BPF_REG_A; - insn->src_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); + insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; } /* Common case where 'jump_false' is next insn. */ -- cgit v0.10.2 From 8e2d61e0aed2b7c4ecb35844fe07e0b2b762dee4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 10 Sep 2015 17:31:15 -0300 Subject: sctp: fix race on protocol/netns initialization Consider sctp module is unloaded and is being requested because an user is creating a sctp socket. During initialization, sctp will add the new protocol type and then initialize pernet subsys: status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; status = sctp_v6_protosw_init(); if (status) goto err_v6_protosw_init; status = register_pernet_subsys(&sctp_net_ops); The problem is that after those calls to sctp_v{4,6}_protosw_init(), it is possible for userspace to create SCTP sockets like if the module is already fully loaded. If that happens, one of the possible effects is that we will have readers for net->sctp.local_addr_list list earlier than expected and sctp_net_init() does not take precautions while dealing with that list, leading to a potential panic but not limited to that, as sctp_sock_init() will copy a bunch of blank/partially initialized values from net->sctp. The race happens like this: CPU 0 | CPU 1 socket() | __sock_create | socket() inet_create | __sock_create list_for_each_entry_rcu( | answer, &inetsw[sock->type], | list) { | inet_create /* no hits */ | if (unlikely(err)) { | ... | request_module() | /* socket creation is blocked | * the module is fully loaded | */ | sctp_init | sctp_v4_protosw_init | inet_register_protosw | list_add_rcu(&p->list, | last_perm); | | list_for_each_entry_rcu( | answer, &inetsw[sock->type], sctp_v6_protosw_init | list) { | /* hit, so assumes protocol | * is already loaded | */ | /* socket creation continues | * before netns is initialized | */ register_pernet_subsys | Simply inverting the initialization order between register_pernet_subsys() and sctp_v4_protosw_init() is not possible because register_pernet_subsys() will create a control sctp socket, so the protocol must be already visible by then. Deferring the socket creation to a work-queue is not good specially because we loose the ability to handle its errors. So, as suggested by Vlad, the fix is to split netns initialization in two moments: defaults and control socket, so that the defaults are already loaded by when we register the protocol, while control socket initialization is kept at the same moment it is today. Fixes: 4db67e808640 ("sctp: Make the address lists per network namespace") Signed-off-by: Vlad Yasevich Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b714333..3d9ea9a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1186,7 +1186,7 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } -static int __net_init sctp_net_init(struct net *net) +static int __net_init sctp_defaults_init(struct net *net) { int status; @@ -1279,12 +1279,6 @@ static int __net_init sctp_net_init(struct net *net) sctp_dbg_objcnt_init(net); - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init(net))) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1300,9 +1294,6 @@ static int __net_init sctp_net_init(struct net *net) return 0; -err_ctl_sock_init: - sctp_dbg_objcnt_exit(net); - sctp_proc_exit(net); err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: @@ -1311,15 +1302,12 @@ err_sysctl_register: return status; } -static void __net_exit sctp_net_exit(struct net *net) +static void __net_exit sctp_defaults_exit(struct net *net) { /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(net->sctp.ctl_sock); - sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); @@ -1327,9 +1315,32 @@ static void __net_exit sctp_net_exit(struct net *net) sctp_sysctl_net_unregister(net); } -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, +static struct pernet_operations sctp_defaults_ops = { + .init = sctp_defaults_init, + .exit = sctp_defaults_exit, +}; + +static int __net_init sctp_ctrlsock_init(struct net *net) +{ + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + status = sctp_ctl_sock_init(net); + if (status) + pr_err("Failed to initialize the SCTP control sock\n"); + + return status; +} + +static void __net_init sctp_ctrlsock_exit(struct net *net) +{ + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); +} + +static struct pernet_operations sctp_ctrlsock_ops = { + .init = sctp_ctrlsock_init, + .exit = sctp_ctrlsock_exit, }; /* Initialize the universe into something sensible. */ @@ -1462,8 +1473,11 @@ static __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - status = sctp_v4_protosw_init(); + status = register_pernet_subsys(&sctp_defaults_ops); + if (status) + goto err_register_defaults; + status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; @@ -1471,9 +1485,9 @@ static __init int sctp_init(void) if (status) goto err_v6_protosw_init; - status = register_pernet_subsys(&sctp_net_ops); + status = register_pernet_subsys(&sctp_ctrlsock_ops); if (status) - goto err_register_pernet_subsys; + goto err_register_ctrlsock; status = sctp_v4_add_protocol(); if (status) @@ -1489,12 +1503,14 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - unregister_pernet_subsys(&sctp_net_ops); -err_register_pernet_subsys: + unregister_pernet_subsys(&sctp_ctrlsock_ops); +err_register_ctrlsock: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: + unregister_pernet_subsys(&sctp_defaults_ops); +err_register_defaults: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1527,12 +1543,14 @@ static __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - unregister_pernet_subsys(&sctp_net_ops); + unregister_pernet_subsys(&sctp_ctrlsock_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); + unregister_pernet_subsys(&sctp_defaults_ops); + /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); -- cgit v0.10.2 From a19a19de8310fb8ca2ca0621a9db1aab082943c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Sep 2015 11:33:01 +0200 Subject: bnx2x: use ktime_get_seconds() for timestamp commit c48f350ff5e7 "bnx2x: Add MFW dump support" added the bnx2x_update_mfw_dump() function that reads the current time and stores it in a 32-bit field that gets passed into a buffer in a fixed format. This is potentially broken when the epoch overflows in 2038, and otherwise overflows in 2106. As we're trying to avoid uses of struct timeval for this reason, I noticed the addition of this function, and tried to rewrite it in a way that is more explicit about the overflow and that will keep working once we deprecate struct timeval. I assume that it is not possible to change the ABI any more, otherwise we should try to use a 64-bit field for the seconds right away. Signed-off-by: Arnd Bergmann Cc: Yuval Mintz Cc: Ariel Elior Acked-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e3da2bd..89a174f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3705,16 +3705,14 @@ out: void bnx2x_update_mfw_dump(struct bnx2x *bp) { - struct timeval epoc; u32 drv_ver; u32 valid_dump; if (!SHMEM2_HAS(bp, drv_info)) return; - /* Update Driver load time */ - do_gettimeofday(&epoc); - SHMEM2_WR(bp, drv_info.epoc, epoc.tv_sec); + /* Update Driver load time, possibly broken in y2038 */ + SHMEM2_WR(bp, drv_info.epoc, (u32)ktime_get_real_seconds()); drv_ver = bnx2x_update_mng_version_utility(DRV_MODULE_VERSION, true); SHMEM2_WR(bp, drv_info.drv_ver, drv_ver); -- cgit v0.10.2 From c2d4fbd2163e607915cc05798ce7fb7f31117cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 11 Sep 2015 18:39:48 +0200 Subject: bridge: fix igmpv3 / mldv2 report parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the newly introduced helper functions the skb pulling is hidden in the checksumming function - and undone before returning to the caller. The IGMPv3 and MLDv2 report parsing functions in the bridge still assumed that the skb is pointing to the beginning of the IGMP/MLD message while it is now kept at the beginning of the IPv4/6 header, breaking the message parsing and creating packet loss. Fixing this by taking the offset between IP and IGMP/MLD header into account, too. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Tobias Powalowski Tested-by: Tobias Powalowski Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 66efdc2..480b3de 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1006,7 +1006,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); - len = sizeof(*ih); + len = skb_transport_offset(skb) + sizeof(*ih); for (i = 0; i < num; i++) { len += sizeof(*grec); @@ -1067,7 +1067,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); - len = sizeof(*icmp6h); + len = skb_transport_offset(skb) + sizeof(*icmp6h); for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; -- cgit v0.10.2 From 447e9a4d27484175a84daaa8e03d35c650f443b7 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 11 Sep 2015 12:52:26 -0400 Subject: IB/ehca: Deprecate driver, move to staging, schedule deletion The ehca driver is only supported on IBM machines with a custom EBus. As they have opted to build their newer machines using more industry standard technology and haven't really been pushing EBus capable machines for a while, this driver can now safely be moved to the staging area and scheduled for eventual removal. This plan was brought to IBM's attention and received their sign-off. Cc: alexs@linux.vnet.ibm.com Cc: hnguyen@de.ibm.com Cc: raisch@de.ibm.com Cc: stefan.roscher@de.ibm.com Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index da4c697..aa26f3c 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -56,7 +56,6 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1bdb999..aded2a5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,6 +1,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig deleted file mode 100644 index 59f807d..0000000 --- a/drivers/infiniband/hw/ehca/Kconfig +++ /dev/null @@ -1,9 +0,0 @@ -config INFINIBAND_EHCA - tristate "eHCA support" - depends on IBMEBUS - ---help--- - This driver supports the IBM pSeries eHCA InfiniBand adapter. - - To compile the driver as a module, choose M here. The module - will be called ib_ehca. - diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile deleted file mode 100644 index 74d284e..0000000 --- a/drivers/infiniband/hw/ehca/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Authors: Heiko J Schick -# Christoph Raisch -# Joachim Fenkes -# -# Copyright (c) 2005 IBM Corporation -# -# All rights reserved. -# -# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. - -obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o - -ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ - ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ - ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o - diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c deleted file mode 100644 index 4659263..0000000 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * address vector functions - * - * Authors: Hoang-Nam Nguyen - * Khadija Souissi - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static struct kmem_cache *av_cache; - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd) -{ - int path = ib_rate_to_mult(path_rate); - int link, ret; - struct ib_port_attr pa; - - if (path_rate == IB_RATE_PORT_CURRENT) { - *ipd = 0; - return 0; - } - - if (unlikely(path < 0)) { - ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", - path_rate); - return -EINVAL; - } - - ret = ehca_query_port(&shca->ib_device, port, &pa); - if (unlikely(ret < 0)) { - ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); - return ret; - } - - link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - - if (path >= link) - /* no need to throttle if path faster than link */ - *ipd = 0; - else - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; - - return 0; -} - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) -{ - int ret; - struct ehca_av *av; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - - av = kmem_cache_alloc(av_cache, GFP_KERNEL); - if (!av) { - ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", - pd, ah_attr); - return ERR_PTR(-ENOMEM); - } - - av->av.sl = ah_attr->sl; - av->av.dlid = ah_attr->dlid; - av->av.slid_path_bits = ah_attr->src_path_bits; - - if (ehca_static_rate < 0) { - u32 ipd; - if (ehca_calc_ipd(shca, ah_attr->port_num, - ah_attr->static_rate, &ipd)) { - ret = -EINVAL; - goto create_ah_exit1; - } - av->av.ipd = ipd; - } else - av->av.ipd = ehca_static_rate; - - av->av.lnh = ah_attr->ah_flags; - av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(pd->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ret = -EINVAL; - ehca_err(pd->device, "Invalid port number " - "ehca_query_port() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(pd->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ret = -EINVAL; - ehca_err(pd->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); - } - av->av.pmtu = shca->max_mtu; - - /* dgid comes in grh.word_3 */ - memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - return &av->ib_ah; - -create_ah_exit1: - kmem_cache_free(av_cache, av); - - return ERR_PTR(ret); -} - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av; - struct ehca_ud_av new_ehca_av; - struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, - ib_device); - - memset(&new_ehca_av, 0, sizeof(new_ehca_av)); - new_ehca_av.sl = ah_attr->sl; - new_ehca_av.dlid = ah_attr->dlid; - new_ehca_av.slid_path_bits = ah_attr->src_path_bits; - new_ehca_av.ipd = ah_attr->static_rate; - new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, - (ah_attr->ah_flags & IB_AH_GRH) > 0); - new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); - - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(ah->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ehca_err(ah->device, "Invalid port number " - "ehca_query_port() returned %x " - "ah=%p ah_attr=%p port_num=%x", - rc, ah, ah_attr, ah_attr->port_num); - return -EINVAL; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(ah->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ehca_err(ah->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "ah=%p ah_attr=%p port_num=%x " - "sgid_index=%x", - rc, ah, ah_attr, ah_attr->port_num, - ah_attr->grh.sgid_index); - return -EINVAL; - } - memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); - } - - new_ehca_av.pmtu = shca->max_mtu; - - memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - av = container_of(ah, struct ehca_av, ib_ah); - av->av = new_ehca_av; - - return 0; -} - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); - - memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, - sizeof(ah_attr->grh.dgid)); - ah_attr->sl = av->av.sl; - - ah_attr->dlid = av->av.dlid; - - ah_attr->src_path_bits = av->av.slid_path_bits; - ah_attr->static_rate = av->av.ipd; - ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); - ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, - av->av.grh.word_0); - ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, - av->av.grh.word_0); - ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, - av->av.grh.word_0); - - return 0; -} - -int ehca_destroy_ah(struct ib_ah *ah) -{ - kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); - - return 0; -} - -int ehca_init_av_cache(void) -{ - av_cache = kmem_cache_create("ehca_cache_av", - sizeof(struct ehca_av), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!av_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_av_cache(void) -{ - if (av_cache) - kmem_cache_destroy(av_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h deleted file mode 100644 index bd45e0f..0000000 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ /dev/null @@ -1,482 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Struct definition for eHCA internal structures - * - * Authors: Heiko J Schick - * Christoph Raisch - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_H__ -#define __EHCA_CLASSES_H__ - -struct ehca_module; -struct ehca_qp; -struct ehca_cq; -struct ehca_eq; -struct ehca_mr; -struct ehca_mw; -struct ehca_pd; -struct ehca_av; - -#include -#include - -#include -#include - -#ifdef CONFIG_PPC64 -#include "ehca_classes_pSeries.h" -#endif -#include "ipz_pt_fn.h" -#include "ehca_qes.h" -#include "ehca_irq.h" - -#define EHCA_EQE_CACHE_SIZE 20 -#define EHCA_MAX_NUM_QUEUES 0xffff - -struct ehca_eqe_cache_entry { - struct ehca_eqe *eqe; - struct ehca_cq *cq; -}; - -struct ehca_eq { - u32 length; - struct ipz_queue ipz_queue; - struct ipz_eq_handle ipz_eq_handle; - struct work_struct work; - struct h_galpas galpas; - int is_initialized; - struct ehca_pfeq pf; - spinlock_t spinlock; - struct tasklet_struct interrupt_task; - u32 ist; - spinlock_t irq_spinlock; - struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; -}; - -struct ehca_sma_attr { - u16 lid, lmc, sm_sl, sm_lid; - u16 pkey_tbl_len, pkeys[16]; -}; - -struct ehca_sport { - struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_sqp[2]; - /* lock to serialze modify_qp() calls for sqp in normal - * and irq path (when event PORT_ACTIVE is received first time) - */ - spinlock_t mod_sqp_lock; - enum ib_port_state port_state; - struct ehca_sma_attr saved_attr; - u32 pma_qp_nr; -}; - -#define HCA_CAP_MR_PGSIZE_4K 0x80000000 -#define HCA_CAP_MR_PGSIZE_64K 0x40000000 -#define HCA_CAP_MR_PGSIZE_1M 0x20000000 -#define HCA_CAP_MR_PGSIZE_16M 0x10000000 - -struct ehca_shca { - struct ib_device ib_device; - struct platform_device *ofdev; - u8 num_ports; - int hw_level; - struct list_head shca_list; - struct ipz_adapter_handle ipz_hca_handle; - struct ehca_sport sport[2]; - struct ehca_eq eq; - struct ehca_eq neq; - struct ehca_mr *maxmr; - struct ehca_pd *pd; - struct h_galpas galpas; - struct mutex modify_mutex; - u64 hca_cap; - /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ - u32 hca_cap_mr_pgsize; - int max_mtu; - int max_num_qps; - int max_num_cqs; - atomic_t num_cqs; - atomic_t num_qps; -}; - -struct ehca_pd { - struct ib_pd ib_pd; - struct ipz_pd fw_pd; - /* small queue mgmt */ - struct mutex lock; - struct list_head free[2]; - struct list_head full[2]; -}; - -enum ehca_ext_qp_type { - EQPT_NORMAL = 0, - EQPT_LLQP = 1, - EQPT_SRQBASE = 2, - EQPT_SRQ = 3, -}; - -/* struct to cache modify_qp()'s parms for GSI/SMI qp */ -struct ehca_mod_qp_parm { - int mask; - struct ib_qp_attr attr; -}; - -#define EHCA_MOD_QP_PARM_MAX 4 - -#define QMAP_IDX_MASK 0xFFFFULL - -/* struct for tracking if cqes have been reported to the application */ -struct ehca_qmap_entry { - u16 app_wr_id; - u8 reported; - u8 cqe_req; -}; - -struct ehca_queue_map { - struct ehca_qmap_entry *map; - unsigned int entries; - unsigned int tail; - unsigned int left_to_poll; - unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ -}; - -/* function to calculate the next index for the qmap */ -static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) -{ - unsigned int temp = cur_index + 1; - return (temp == limit) ? 0 : temp; -} - -struct ehca_qp { - union { - struct ib_qp ib_qp; - struct ib_srq ib_srq; - }; - u32 qp_type; - enum ehca_ext_qp_type ext_type; - enum ib_qp_state state; - struct ipz_queue ipz_squeue; - struct ehca_queue_map sq_map; - struct ipz_queue ipz_rqueue; - struct ehca_queue_map rq_map; - struct h_galpas galpas; - u32 qkey; - u32 real_qp_num; - u32 token; - spinlock_t spinlock_s; - spinlock_t spinlock_r; - u32 sq_max_inline_data_size; - struct ipz_qp_handle ipz_qp_handle; - struct ehca_pfqp pf; - struct ib_qp_init_attr init_attr; - struct ehca_cq *send_cq; - struct ehca_cq *recv_cq; - unsigned int sqerr_purgeflag; - struct hlist_node list_entries; - /* array to cache modify_qp()'s parms for GSI/SMI qp */ - struct ehca_mod_qp_parm *mod_qp_parm; - int mod_qp_parm_idx; - /* mmap counter for resources mapped into user space */ - u32 mm_count_squeue; - u32 mm_count_rqueue; - u32 mm_count_galpa; - /* unsolicited ack circumvention */ - int unsol_ack_circ; - int mtu_shift; - u32 message_count; - u32 packet_count; - atomic_t nr_events; /* events seen */ - wait_queue_head_t wait_completion; - int mig_armed; - struct list_head sq_err_node; - struct list_head rq_err_node; -}; - -#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) -#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) -#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) - -/* must be power of 2 */ -#define QP_HASHTAB_LEN 8 - -struct ehca_cq { - struct ib_cq ib_cq; - struct ipz_queue ipz_queue; - struct h_galpas galpas; - spinlock_t spinlock; - u32 cq_number; - u32 token; - u32 nr_of_entries; - struct ipz_cq_handle ipz_cq_handle; - struct ehca_pfcq pf; - spinlock_t cb_lock; - struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; - struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - atomic_t nr_events; /* #events seen */ - wait_queue_head_t wait_completion; - spinlock_t task_lock; - /* mmap counter for resources mapped into user space */ - u32 mm_count_queue; - u32 mm_count_galpa; - struct list_head sqp_err_list; - struct list_head rqp_err_list; -}; - -enum ehca_mr_flag { - EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ - EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ -}; - -struct ehca_mr { - union { - struct ib_mr ib_mr; /* must always be first in ehca_mr */ - struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ - } ib; - struct ib_umem *umem; - spinlock_t mrlock; - - enum ehca_mr_flag flags; - u32 num_kpages; /* number of kernel pages */ - u32 num_hwpages; /* number of hw pages to form MR */ - u64 hwpage_size; /* hw page size used for this MR */ - int acl; /* ACL (stored here for usage in reregister) */ - u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ - u64 size; /* size (stored here for usage in reregister) */ - u32 fmr_page_size; /* page size for FMR */ - u32 fmr_max_pages; /* max pages for FMR */ - u32 fmr_max_maps; /* max outstanding maps for FMR */ - u32 fmr_map_cnt; /* map counter for FMR */ - /* fw specific data */ - struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ - struct h_galpas galpas; -}; - -struct ehca_mw { - struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ - spinlock_t mwlock; - - u8 never_bound; /* indication MW was never bound */ - struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ - struct h_galpas galpas; -}; - -enum ehca_mr_pgi_type { - EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, - * ehca_rereg_phys_mr, - * ehca_reg_internal_maxmr */ - EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ - EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ -}; - -struct ehca_mr_pginfo { - enum ehca_mr_pgi_type type; - u64 num_kpages; - u64 kpage_cnt; - u64 hwpage_size; /* hw page size used for this MR */ - u64 num_hwpages; /* number of hw pages */ - u64 hwpage_cnt; /* counter for hw pages */ - u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ - - union { - struct { /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - } phy; - struct { /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct scatterlist *next_sg; - u64 next_nmap; - } usr; - struct { /* type EHCA_MR_PGI_FMR section */ - u64 fmr_pgsize; - u64 *page_list; - u64 next_listelem; - } fmr; - } u; -}; - -/* output parameters for MR/FMR hipz calls */ -struct ehca_mr_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 lkey; - u32 rkey; - u64 len; - u64 vaddr; - u32 acl; -}; - -/* output parameters for MW hipz calls */ -struct ehca_mw_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 rkey; -}; - -struct ehca_av { - struct ib_ah ib_ah; - struct ehca_ud_av av; -}; - -struct ehca_ucontext { - struct ib_ucontext ib_ucontext; -}; - -int ehca_init_pd_cache(void); -void ehca_cleanup_pd_cache(void); -int ehca_init_cq_cache(void); -void ehca_cleanup_cq_cache(void); -int ehca_init_qp_cache(void); -void ehca_cleanup_qp_cache(void); -int ehca_init_av_cache(void); -void ehca_cleanup_av_cache(void); -int ehca_init_mrmw_cache(void); -void ehca_cleanup_mrmw_cache(void); -int ehca_init_small_qp_cache(void); -void ehca_cleanup_small_qp_cache(void); - -extern rwlock_t ehca_qp_idr_lock; -extern rwlock_t ehca_cq_idr_lock; -extern struct idr ehca_qp_idr; -extern struct idr ehca_cq_idr; -extern spinlock_t shca_list_lock; - -extern int ehca_static_rate; -extern int ehca_port_act_time; -extern bool ehca_use_hp_mr; -extern bool ehca_scaling_code; -extern int ehca_lock_hcalls; -extern int ehca_nr_ports; -extern int ehca_max_cq; -extern int ehca_max_qp; - -struct ipzu_queue_resp { - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; - u32 offset; /* save offset within a page for small_qp */ -}; - -struct ehca_create_cq_resp { - u32 cq_number; - u32 token; - struct ipzu_queue_resp ipz_queue; - u32 fw_handle_ofs; - u32 dummy; -}; - -struct ehca_create_qp_resp { - u32 qp_num; - u32 token; - u32 qp_type; - u32 ext_type; - u32 qkey; - /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ - u32 real_qp_num; - u32 fw_handle_ofs; - u32 dummy; - struct ipzu_queue_resp ipz_squeue; - struct ipzu_queue_resp ipz_rqueue; -}; - -struct ehca_alloc_cq_parms { - u32 nr_cqe; - u32 act_nr_of_entries; - u32 act_pages; - struct ipz_eq_handle eq_handle; -}; - -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3, -}; - -enum ehca_ll_comp_flags { - LLQP_SEND_COMP = 0x20, - LLQP_RECV_COMP = 0x40, - LLQP_COMP_MASK = 0x60, -}; - -struct ehca_alloc_queue_parms { - /* input parameters */ - int max_wr; - int max_sge; - int page_size; - int is_small; - - /* output parameters */ - u16 act_nr_wqes; - u8 act_nr_sges; - u32 queue_size; /* bytes for small queues, pages otherwise */ -}; - -struct ehca_alloc_qp_parms { - struct ehca_alloc_queue_parms squeue; - struct ehca_alloc_queue_parms rqueue; - - /* input parameters */ - enum ehca_service_type servicetype; - int qp_storage; - int sigtype; - enum ehca_ext_qp_type ext_type; - enum ehca_ll_comp_flags ll_comp_flags; - int ud_av_l_key_ctl; - - u32 token; - struct ipz_eq_handle eq_handle; - struct ipz_pd pd; - struct ipz_cq_handle send_cq_handle, recv_cq_handle; - - u32 srq_qpn, srq_token, srq_limit; - - /* output parameters */ - u32 real_qp_num; - struct ipz_qp_handle qp_handle; - struct h_galpas galpas; -}; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h deleted file mode 100644 index 689c357..0000000 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * pSeries interface definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_PSERIES_H__ -#define __EHCA_CLASSES_PSERIES_H__ - -#include "hcp_phyp.h" -#include "ipz_pt_fn.h" - - -struct ehca_pfqp { - struct ipz_qpt sqpt; - struct ipz_qpt rqpt; -}; - -struct ehca_pfcq { - struct ipz_qpt qpt; - u32 cqnr; -}; - -struct ehca_pfeq { - struct ipz_qpt qpt; - struct h_galpa galpa; - u32 eqnr; -}; - -struct ipz_adapter_handle { - u64 handle; -}; - -struct ipz_cq_handle { - u64 handle; -}; - -struct ipz_eq_handle { - u64 handle; -}; - -struct ipz_qp_handle { - u64 handle; -}; -struct ipz_mrmw_handle { - u64 handle; -}; - -struct ipz_pd { - u32 value; -}; - -struct hcp_modify_qp_control_block { - u32 qkey; /* 00 */ - u32 rdd; /* reliable datagram domain */ - u32 send_psn; /* 02 */ - u32 receive_psn; /* 03 */ - u32 prim_phys_port; /* 04 */ - u32 alt_phys_port; /* 05 */ - u32 prim_p_key_idx; /* 06 */ - u32 alt_p_key_idx; /* 07 */ - u32 rdma_atomic_ctrl; /* 08 */ - u32 qp_state; /* 09 */ - u32 reserved_10; /* 10 */ - u32 rdma_nr_atomic_resp_res; /* 11 */ - u32 path_migration_state; /* 12 */ - u32 rdma_atomic_outst_dest_qp; /* 13 */ - u32 dest_qp_nr; /* 14 */ - u32 min_rnr_nak_timer_field; /* 15 */ - u32 service_level; /* 16 */ - u32 send_grh_flag; /* 17 */ - u32 retry_count; /* 18 */ - u32 timeout; /* 19 */ - u32 path_mtu; /* 20 */ - u32 max_static_rate; /* 21 */ - u32 dlid; /* 22 */ - u32 rnr_retry_count; /* 23 */ - u32 source_path_bits; /* 24 */ - u32 traffic_class; /* 25 */ - u32 hop_limit; /* 26 */ - u32 source_gid_idx; /* 27 */ - u32 flow_label; /* 28 */ - u32 reserved_29; /* 29 */ - union { /* 30 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid; - u32 service_level_al; /* 34 */ - u32 send_grh_flag_al; /* 35 */ - u32 retry_count_al; /* 36 */ - u32 timeout_al; /* 37 */ - u32 max_static_rate_al; /* 38 */ - u32 dlid_al; /* 39 */ - u32 rnr_retry_count_al; /* 40 */ - u32 source_path_bits_al; /* 41 */ - u32 traffic_class_al; /* 42 */ - u32 hop_limit_al; /* 43 */ - u32 source_gid_idx_al; /* 44 */ - u32 flow_label_al; /* 45 */ - u32 reserved_46; /* 46 */ - u32 reserved_47; /* 47 */ - union { /* 48 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid_al; - u32 max_nr_outst_send_wr; /* 52 */ - u32 max_nr_outst_recv_wr; /* 53 */ - u32 disable_ete_credit_check; /* 54 */ - u32 qp_number; /* 55 */ - u64 send_queue_handle; /* 56 */ - u64 recv_queue_handle; /* 58 */ - u32 actual_nr_sges_in_sq_wqe; /* 60 */ - u32 actual_nr_sges_in_rq_wqe; /* 61 */ - u32 qp_enable; /* 62 */ - u32 curr_srq_limit; /* 63 */ - u64 qp_aff_asyn_ev_log_reg; /* 64 */ - u64 shared_rq_hndl; /* 66 */ - u64 trigg_doorbell_qp_hndl; /* 68 */ - u32 reserved_70_127[58]; /* 70 */ -}; - -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) - -#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c deleted file mode 100644 index 9b68b17..0000000 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Completion queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_iverbs.h" -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "hcp_if.h" - -static struct kmem_cache *cq_cache; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) -{ - unsigned int qp_num = qp->real_qp_num; - unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, flags); - - ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", - cq->cq_number, qp_num); - - return 0; -} - -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) -{ - int ret = -EINVAL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - hlist_del(iter); - ehca_dbg(cq->ib_cq.device, - "removed qp from cq .cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - ret = 0; - break; - } - } - spin_unlock_irqrestore(&cq->spinlock, flags); - if (ret) - ehca_err(cq->ib_cq.device, - "qp not found cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - - return ret; -} - -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) -{ - struct ehca_qp *ret = NULL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - ret = qp; - break; - } - } - return ret; -} - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - int cqe = attr->cqe; - static const u32 additional_cqe = 20; - struct ib_cq *cq; - struct ehca_cq *my_cq; - struct ehca_shca *shca = - container_of(device, struct ehca_shca, ib_device); - struct ipz_adapter_handle adapter_handle; - struct ehca_alloc_cq_parms param; /* h_call's out parameters */ - struct h_galpa gal; - void *vpage; - u32 counter; - u64 rpage, cqx_fec, h_ret; - int ipz_rc, i; - unsigned long flags; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) - return ERR_PTR(-EINVAL); - - if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { - ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", shca->max_num_cqs); - ehca_err(device, "To increase the maximum number of CQs " - "use the number_of_cqs module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); - if (!my_cq) { - ehca_err(device, "Out of memory for ehca_cq struct device=%p", - device); - atomic_dec(&shca->num_cqs); - return ERR_PTR(-ENOMEM); - } - - memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); - - spin_lock_init(&my_cq->spinlock); - spin_lock_init(&my_cq->cb_lock); - spin_lock_init(&my_cq->task_lock); - atomic_set(&my_cq->nr_events, 0); - init_waitqueue_head(&my_cq->wait_completion); - - cq = &my_cq->ib_cq; - - adapter_handle = shca->ipz_hca_handle; - param.eq_handle = shca->eq.ipz_eq_handle; - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_cq_idr_lock, flags); - my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - idr_preload_end(); - - if (my_cq->token < 0) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't allocate new idr entry. device=%p", - device); - goto create_cq_exit1; - } - - /* - * CQs maximum depth is 4GB-64, but we need additional 20 as buffer - * for receiving errors CQEs. - */ - param.nr_cqe = cqe + additional_cqe; - h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); - - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%lli device=%p", h_ret, device); - cq = ERR_PTR(ehca2ib_return_code(h_ret)); - goto create_cq_exit2; - } - - ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); - if (!ipz_rc) { - ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", - ipz_rc, device); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit3; - } - - for (counter = 0; counter < param.act_pages; counter++) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if (!vpage) { - ehca_err(device, "ipz_qpageit_get_inc() " - "returns NULL device=%p", device); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_cq(adapter_handle, - my_cq->ipz_cq_handle, - &my_cq->pf, - 0, - 0, - rpage, - 1, - my_cq->galpas. - kernel); - - if (h_ret < H_SUCCESS) { - ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " - "act_pages=%i", my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit4; - } - - if (counter == (param.act_pages - 1)) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if ((h_ret != H_SUCCESS) || vpage) { - ehca_err(device, "Registration of pages not " - "complete ehca_cq=%p cq_num=%x " - "h_ret=%lli", my_cq, my_cq->cq_number, - h_ret); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%lli " - "counter=%i act_pages=%i", - my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-ENOMEM); - goto create_cq_exit4; - } - } - } - - ipz_qeit_reset(&my_cq->ipz_queue); - - gal = my_cq->galpas.kernel; - cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", - my_cq, my_cq->cq_number, cqx_fec); - - my_cq->ib_cq.cqe = my_cq->nr_of_entries = - param.act_nr_of_entries - additional_cqe; - my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; - - for (i = 0; i < QP_HASHTAB_LEN; i++) - INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); - - INIT_LIST_HEAD(&my_cq->sqp_err_list); - INIT_LIST_HEAD(&my_cq->rqp_err_list); - - if (context) { - struct ipz_queue *ipz_queue = &my_cq->ipz_queue; - struct ehca_create_cq_resp resp; - memset(&resp, 0, sizeof(resp)); - resp.cq_number = my_cq->cq_number; - resp.token = my_cq->token; - resp.ipz_queue.qe_size = ipz_queue->qe_size; - resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; - resp.ipz_queue.queue_length = ipz_queue->queue_length; - resp.ipz_queue.pagesize = ipz_queue->pagesize; - resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - resp.fw_handle_ofs = (u32) - (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - ehca_err(device, "Copy to udata failed."); - cq = ERR_PTR(-EFAULT); - goto create_cq_exit4; - } - } - - return cq; - -create_cq_exit4: - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - -create_cq_exit3: - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret != H_SUCCESS) - ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); - -create_cq_exit2: - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - -create_cq_exit1: - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return cq; -} - -int ehca_destroy_cq(struct ib_cq *cq) -{ - u64 h_ret; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int cq_num = my_cq->cq_number; - struct ib_device *device = cq->device; - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - unsigned long flags; - - if (cq->uobject) { - if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { - ehca_err(device, "Resources still referenced in " - "user space cq_num=%x", my_cq->cq_number); - return -EINVAL; - } - } - - /* - * remove the CQ from the idr first to make sure - * no more interrupt tasklets will touch this CQ - */ - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - /* now wait until all pending events have completed */ - wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); - - /* nobody's using our CQ any longer -- we can destroy it */ - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); - if (h_ret == H_R_STATE) { - /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " - "state. Try to delete it forcibly.", - my_cq, cq_num, my_cq->ipz_cq_handle.handle); - ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret == H_SUCCESS) - ehca_dbg(device, "cq_num=%x deleted successfully.", - cq_num); - } - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " - "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); - return ehca2ib_return_code(h_ret); - } - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return 0; -} - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - /* TODO: proper resize needs to be done */ - ehca_err(cq->device, "not implemented yet"); - - return -EFAULT; -} - -int ehca_init_cq_cache(void) -{ - cq_cache = kmem_cache_create("ehca_cache_cq", - sizeof(struct ehca_cq), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!cq_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_cq_cache(void) -{ - if (cq_cache) - kmem_cache_destroy(cq_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c deleted file mode 100644 index 90da674..0000000 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Event queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_qes.h" -#include "hcp_if.h" -#include "ipz_pt_fn.h" - -int ehca_create_eq(struct ehca_shca *shca, - struct ehca_eq *eq, - const enum ehca_eq_type type, const u32 length) -{ - int ret; - u64 h_ret; - u32 nr_pages; - u32 i; - void *vpage; - struct ib_device *ib_dev = &shca->ib_device; - - spin_lock_init(&eq->spinlock); - spin_lock_init(&eq->irq_spinlock); - eq->is_initialized = 0; - - if (type != EHCA_EQ && type != EHCA_NEQ) { - ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); - return -EINVAL; - } - if (!length) { - ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); - return -EINVAL; - } - - h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); - - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); - return -EINVAL; - } - - ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); - if (!ret) { - ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); - goto create_eq_exit1; - } - - for (i = 0; i < nr_pages; i++) { - u64 rpage; - - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) - goto create_eq_exit2; - - rpage = __pa(vpage); - h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); - - if (i == (nr_pages - 1)) { - /* last page */ - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (h_ret != H_SUCCESS || vpage) - goto create_eq_exit2; - } else { - if (h_ret != H_PAGE_REGISTERED) - goto create_eq_exit2; - } - } - - ipz_qeit_reset(&eq->ipz_queue); - - /* register interrupt handlers and initialize work queues */ - if (type == EHCA_EQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - 0, "ehca_eq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } else if (type == EHCA_NEQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - 0, "ehca_neq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } - - eq->is_initialized = 1; - - return 0; - -create_eq_exit2: - ipz_queue_dtor(NULL, &eq->ipz_queue); - -create_eq_exit1: - hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - return -EINVAL; -} - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - void *eqe; - - spin_lock_irqsave(&eq->spinlock, flags); - eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); - spin_unlock_irqrestore(&eq->spinlock, flags); - - return eqe; -} - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - u64 h_ret; - - ibmebus_free_irq(eq->ist, (void *)shca); - - spin_lock_irqsave(&shca_list_lock, flags); - eq->is_initialized = 0; - spin_unlock_irqrestore(&shca_list_lock, flags); - - tasklet_kill(&eq->interrupt_task); - - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't free EQ resources."); - return -EINVAL; - } - ipz_queue_dtor(NULL, &eq->ipz_queue); - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c deleted file mode 100644 index e8b1bb6..0000000 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HCA query functions - * - * Authors: Heiko J Schick - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static unsigned int limit_uint(unsigned int value) -{ - return min_t(unsigned int, value, INT_MAX); -} - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - int i, ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_hca *rblock; - - static const u32 cap_mapping[] = { - IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, - IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, - IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, - IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, - IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, - IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, - IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, - IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, - IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, - IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, - IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, - }; - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto query_device1; - } - - memset(props, 0, sizeof(struct ib_device_attr)); - props->page_size_cap = shca->hca_cap_mr_pgsize; - props->fw_ver = rblock->hw_ver; - props->max_mr_size = rblock->max_mr_size; - props->vendor_id = rblock->vendor_id >> 8; - props->vendor_part_id = rblock->vendor_part_id >> 16; - props->hw_ver = rblock->hw_ver; - props->max_qp = limit_uint(rblock->max_qp); - props->max_qp_wr = limit_uint(rblock->max_wqes_wq); - props->max_sge = limit_uint(rblock->max_sge); - props->max_sge_rd = limit_uint(rblock->max_sge_rd); - props->max_cq = limit_uint(rblock->max_cq); - props->max_cqe = limit_uint(rblock->max_cqe); - props->max_mr = limit_uint(rblock->max_mr); - props->max_mw = limit_uint(rblock->max_mw); - props->max_pd = limit_uint(rblock->max_pd); - props->max_ah = limit_uint(rblock->max_ah); - props->max_ee = limit_uint(rblock->max_rd_ee_context); - props->max_rdd = limit_uint(rblock->max_rd_domain); - props->max_fmr = limit_uint(rblock->max_mr); - props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); - props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); - props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); - props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); - props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - props->max_srq = limit_uint(props->max_qp); - props->max_srq_wr = limit_uint(props->max_qp_wr); - props->max_srq_sge = 3; - } - - props->max_pkeys = 16; - /* Some FW versions say 0 here; insert sensible value in that case */ - props->local_ca_ack_delay = rblock->local_ca_ack_delay ? - min_t(u8, rblock->local_ca_ack_delay, 255) : 12; - props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); - props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); - props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); - props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); - props->max_total_mcast_qp_attach - = limit_uint(rblock->max_total_mcast_qp_attach); - - /* translate device capabilities */ - props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; - for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) - if (rblock->hca_cap_indicators & cap_mapping[i + 1]) - props->device_cap_flags |= cap_mapping[i]; - -query_device1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) -{ - switch (fw_mtu) { - case 0x1: - return IB_MTU_256; - case 0x2: - return IB_MTU_512; - case 0x3: - return IB_MTU_1024; - case 0x4: - return IB_MTU_2048; - case 0x5: - return IB_MTU_4096; - default: - ehca_err(&shca->ib_device, "Unknown MTU size: %x.", - fw_mtu); - return 0; - } -} - -static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) -{ - switch (vl_cap) { - case 0x1: - return 1; - case 0x2: - return 2; - case 0x3: - return 4; - case 0x4: - return 8; - case 0x5: - return 15; - default: - ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", - vl_cap); - return 0; - } -} - -int ehca_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_port1; - } - - memset(props, 0, sizeof(struct ib_port_attr)); - - props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); - props->port_cap_flags = rblock->capability_mask; - props->gid_tbl_len = rblock->gid_tbl_len; - if (rblock->max_msg_sz) - props->max_msg_sz = rblock->max_msg_sz; - else - props->max_msg_sz = 0x1 << 31; - props->bad_pkey_cntr = rblock->bad_pkey_cntr; - props->qkey_viol_cntr = rblock->qkey_viol_cntr; - props->pkey_tbl_len = rblock->pkey_tbl_len; - props->lid = rblock->lid; - props->sm_lid = rblock->sm_lid; - props->lmc = rblock->lmc; - props->sm_sl = rblock->sm_sl; - props->subnet_timeout = rblock->subnet_timeout; - props->init_type_reply = rblock->init_type_reply; - props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); - - if (rblock->state && rblock->phys_width) { - props->phys_state = rblock->phys_pstate; - props->state = rblock->phys_state; - props->active_width = rblock->phys_width; - props->active_speed = rblock->phys_speed; - } else { - /* old firmware releases don't report physical - * port info, so use default values - */ - props->phys_state = 5; - props->state = rblock->state; - props->active_width = IB_WIDTH_12X; - props->active_speed = IB_SPEED_SDR; - } - -query_port1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_sma_attr(struct ehca_shca *shca, - u8 port, struct ehca_sma_attr *attr) -{ - int ret = 0; - u64 h_ret; - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_sma_attr1; - } - - memset(attr, 0, sizeof(struct ehca_sma_attr)); - - attr->lid = rblock->lid; - attr->lmc = rblock->lmc; - attr->sm_sl = rblock->sm_sl; - attr->sm_lid = rblock->sm_lid; - - attr->pkey_tbl_len = rblock->pkey_tbl_len; - memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); - -query_sma_attr1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if (index > 16) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_pkey1; - } - - memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); - -query_pkey1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - if (index < 0 || index > 255) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_gid1; - } - - memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); - memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); - -query_gid1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static const u32 allowed_port_caps = ( - IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP); - -int ehca_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - int ret = 0; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - u32 cap; - u64 hret; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if ((props->set_port_cap_mask | props->clr_port_cap_mask) - & ~allowed_port_caps) { - ehca_err(&shca->ib_device, "Non-changeable bits set in masks " - "set=%x clr=%x allowed=%x", props->set_port_cap_mask, - props->clr_port_cap_mask, allowed_port_caps); - return -EINVAL; - } - - if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - ret = -ENOMEM; - goto modify_port1; - } - - hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto modify_port2; - } - - cap = (rblock->capability_mask | props->set_port_cap_mask) - & ~props->clr_port_cap_mask; - - hret = hipz_h_modify_port(shca->ipz_hca_handle, port, - cap, props->init_type, port_modify_mask); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", - hret); - ret = -EINVAL; - } - -modify_port2: - ehca_free_fw_ctrlblock(rblock); - -modify_port1: - mutex_unlock(&shca->modify_mutex); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c deleted file mode 100644 index 8615d7c..0000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Functions for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) -#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) - -static void queue_comp_task(struct ehca_cq *__cq); - -static struct ehca_comp_pool *pool; - -static inline void comp_event_callback(struct ehca_cq *cq) -{ - if (!cq->ib_cq.comp_handler) - return; - - spin_lock(&cq->cb_lock); - cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); - spin_unlock(&cq->cb_lock); - - return; -} - -static void print_error_data(struct ehca_shca *shca, void *data, - u64 *rblock, int length) -{ - u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); - u64 resource = rblock[1]; - - switch (type) { - case 0x1: /* Queue Pair */ - { - struct ehca_qp *qp = (struct ehca_qp *)data; - - /* only print error data if AER is set */ - if (rblock[6] == 0) - return; - - ehca_err(&shca->ib_device, - "QP 0x%x (resource=%llx) has errors.", - qp->ib_qp.qp_num, resource); - break; - } - case 0x4: /* Completion Queue */ - { - struct ehca_cq *cq = (struct ehca_cq *)data; - - ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%llx) has errors.", - cq->cq_number, resource); - break; - } - default: - ehca_err(&shca->ib_device, - "Unknown error type: %llx on %s.", - type, shca->ib_device.name); - break; - } - - ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data begin " - "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%llx", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data end " - "----------------------------------------------------"); - - return; -} - -int ehca_error_data(struct ehca_shca *shca, void *data, - u64 resource) -{ - - unsigned long ret; - u64 *rblock; - unsigned long block_count; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); - ret = -ENOMEM; - goto error_data1; - } - - /* rblock must be 4K aligned and should be 4K large */ - ret = hipz_h_error_data(shca->ipz_hca_handle, - resource, - rblock, - &block_count); - - if (ret == H_R_STATE) - ehca_err(&shca->ib_device, - "No error data is available: %llx.", resource); - else if (ret == H_SUCCESS) { - int length; - - length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); - - if (length > EHCA_PAGESIZE) - length = EHCA_PAGESIZE; - - print_error_data(shca, data, rblock, length); - } else - ehca_err(&shca->ib_device, - "Error data could not be fetched: %llx", resource); - - ehca_free_fw_ctrlblock(rblock); - -error_data1: - return ret; - -} - -static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, - enum ib_event_type event_type) -{ - struct ib_event event; - - /* PATH_MIG without the QP ever having been armed is false alarm */ - if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) - return; - - event.device = &shca->ib_device; - event.event = event_type; - - if (qp->ext_type == EQPT_SRQ) { - if (!qp->ib_srq.event_handler) - return; - - event.element.srq = &qp->ib_srq; - qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); - } else { - if (!qp->ib_qp.event_handler) - return; - - event.element.qp = &qp->ib_qp; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); - } -} - -static void qp_event_callback(struct ehca_shca *shca, u64 eqe, - enum ib_event_type event_type, int fatal) -{ - struct ehca_qp *qp; - u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, token); - if (qp) - atomic_inc(&qp->nr_events); - read_unlock(&ehca_qp_idr_lock); - - if (!qp) - return; - - if (fatal) - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - - dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? - IB_EVENT_SRQ_ERR : event_type); - - /* - * eHCA only processes one WQE at a time for SRQ base QPs, - * so the last WQE has been processed as soon as the QP enters - * error state. - */ - if (fatal && qp->ext_type == EQPT_SRQBASE) - dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); - - if (atomic_dec_and_test(&qp->nr_events)) - wake_up(&qp->wait_completion); - return; -} - -static void cq_event_callback(struct ehca_shca *shca, - u64 eqe) -{ - struct ehca_cq *cq; - u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - - if (!cq) - return; - - ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); - - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - return; -} - -static void parse_identifier(struct ehca_shca *shca, u64 eqe) -{ - u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); - - switch (identifier) { - case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); - break; - case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); - break; - case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); - break; - case 0x05: /* QP error */ - case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); - break; - case 0x07: /* CQ error */ - case 0x08: /* CQ error */ - cq_event_callback(shca, eqe); - break; - case 0x09: /* MRMWPTE error */ - ehca_err(&shca->ib_device, "MRMWPTE error."); - break; - case 0x0A: /* port event */ - ehca_err(&shca->ib_device, "Port event."); - break; - case 0x0B: /* MR access error */ - ehca_err(&shca->ib_device, "MR access error."); - break; - case 0x0C: /* EQ error */ - ehca_err(&shca->ib_device, "EQ error."); - break; - case 0x0D: /* P/Q_Key mismatch */ - ehca_err(&shca->ib_device, "P/Q_Key mismatch."); - break; - case 0x10: /* sampling complete */ - ehca_err(&shca->ib_device, "Sampling complete."); - break; - case 0x11: /* unaffiliated access error */ - ehca_err(&shca->ib_device, "Unaffiliated access error."); - break; - case 0x12: /* path migrating */ - ehca_err(&shca->ib_device, "Path migrating."); - break; - case 0x13: /* interface trace stopped */ - ehca_err(&shca->ib_device, "Interface trace stopped."); - break; - case 0x14: /* first error capture info available */ - ehca_info(&shca->ib_device, "First error capture available"); - break; - case 0x15: /* SRQ limit reached */ - qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); - break; - default: - ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", - identifier, shca->ib_device.name); - break; - } - - return; -} - -static void dispatch_port_event(struct ehca_shca *shca, int port_num, - enum ib_event_type type, const char *msg) -{ - struct ib_event event; - - ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); - event.device = &shca->ib_device; - event.event = type; - event.element.port_num = port_num; - ib_dispatch_event(&event); -} - -static void notify_port_conf_change(struct ehca_shca *shca, int port_num) -{ - struct ehca_sma_attr new_attr; - struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; - - ehca_query_sma_attr(shca, port_num, &new_attr); - - if (new_attr.sm_sl != old_attr->sm_sl || - new_attr.sm_lid != old_attr->sm_lid) - dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, - "SM changed"); - - if (new_attr.lid != old_attr->lid || - new_attr.lmc != old_attr->lmc) - dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, - "LID changed"); - - if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || - memcmp(new_attr.pkeys, old_attr->pkeys, - sizeof(u16) * new_attr.pkey_tbl_len)) - dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, - "P_Key changed"); - - *old_attr = new_attr; -} - -/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ -static int replay_modify_qp(struct ehca_sport *sport) -{ - int aqp1_destroyed; - unsigned long flags; - - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - - aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; - - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!aqp1_destroyed) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - return aqp1_destroyed; -} - -static void parse_ec(struct ehca_shca *shca, u64 eqe) -{ - u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); - u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); - u8 spec_event; - struct ehca_sport *sport = &shca->sport[port - 1]; - - switch (ec) { - case 0x30: /* port availability change */ - if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - /* only replay modify_qp calls in autodetect mode; - * if AQP1 was destroyed, the port is already down - * again and we can drop the event. - */ - if (ehca_nr_ports < 0) - if (replay_modify_qp(sport)) - break; - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, &sport->saved_attr); - } else { - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - } - break; - case 0x31: - /* port configuration change - * disruptive change is caused by - * LID, PKEY or SM change - */ - if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { - ehca_warn(&shca->ib_device, "disruptive port " - "%d configuration change", port); - - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); - } else - notify_port_conf_change(shca, port); - break; - case 0x32: /* adapter malfunction */ - ehca_err(&shca->ib_device, "Adapter malfunction."); - break; - case 0x33: /* trace stopped */ - ehca_err(&shca->ib_device, "Traced stopped."); - break; - case 0x34: /* util async event */ - spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); - if (spec_event == 0x80) /* client reregister required */ - dispatch_port_event(shca, port, - IB_EVENT_CLIENT_REREGISTER, - "client reregister req."); - else - ehca_warn(&shca->ib_device, "Unknown util async " - "event %x on port %x", spec_event, port); - break; - default: - ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", - ec, shca->ib_device.name); - break; - } - - return; -} - -static inline void reset_eq_pending(struct ehca_cq *cq) -{ - u64 CQx_EP; - struct h_galpa gal = cq->galpas.kernel; - - hipz_galpa_store_cq(gal, cqx_ep, 0x0); - CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); - - return; -} - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->neq.interrupt_task); - - return IRQ_HANDLED; -} - -void ehca_tasklet_neq(unsigned long data) -{ - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - u64 ret; - - eqe = ehca_poll_eq(shca, &shca->neq); - - while (eqe) { - if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) - parse_ec(shca, eqe->entry); - - eqe = ehca_poll_eq(shca, &shca->neq); - } - - ret = hipz_h_reset_event(shca->ipz_hca_handle, - shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); - - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, "Can't clear notification events."); - - return; -} - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->eq.interrupt_task); - - return IRQ_HANDLED; -} - - -static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) -{ - u64 eqe_value; - u32 token; - struct ehca_cq *cq; - - eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - ehca_dbg(&shca->ib_device, "Got completion event"); - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (cq == NULL) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq token=%x", - token); - return; - } - reset_eq_pending(cq); - if (ehca_scaling_code) - queue_comp_task(cq); - else { - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eqe_value); - } -} - -void ehca_process_eq(struct ehca_shca *shca, int is_irq) -{ - struct ehca_eq *eq = &shca->eq; - struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value, ret; - int eqe_cnt, i; - int eq_empty = 0; - - spin_lock(&eq->irq_spinlock); - if (is_irq) { - const int max_query_cnt = 100; - int query_cnt = 0; - int int_state = 1; - do { - int_state = hipz_h_query_int_state( - shca->ipz_hca_handle, eq->ist); - query_cnt++; - iosync(); - } while (int_state && query_cnt < max_query_cnt); - if (unlikely((query_cnt == max_query_cnt))) - ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", - int_state, query_cnt); - } - - /* read out all eqes */ - eqe_cnt = 0; - do { - u32 token; - eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); - if (!eqe_cache[eqe_cnt].eqe) - break; - eqe_value = eqe_cache[eqe_cnt].eqe->entry; - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); - if (eqe_cache[eqe_cnt].cq) - atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (!eqe_cache[eqe_cnt].cq) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq " - "token=%x", token); - continue; - } - } else - eqe_cache[eqe_cnt].cq = NULL; - eqe_cnt++; - } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); - if (!eqe_cnt) { - if (is_irq) - ehca_dbg(&shca->ib_device, - "No eqe found for irq event"); - goto unlock_irq_spinlock; - } else if (!is_irq) { - ret = hipz_h_eoi(eq->ist); - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, - "bad return code EOI -rc = %lld\n", ret); - ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); - } - if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) - ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); - /* enable irq for new packets */ - for (i = 0; i < eqe_cnt; i++) { - if (eq->eqe_cache[i].cq) - reset_eq_pending(eq->eqe_cache[i].cq); - } - /* check eq */ - spin_lock(&eq->spinlock); - eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); - spin_unlock(&eq->spinlock); - /* call completion handler for cached eqes */ - for (i = 0; i < eqe_cnt; i++) - if (eq->eqe_cache[i].cq) { - if (ehca_scaling_code) - queue_comp_task(eq->eqe_cache[i].cq); - else { - struct ehca_cq *cq = eq->eqe_cache[i].cq; - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eq->eqe_cache[i].eqe->entry); - } - /* poll eq if not empty */ - if (eq_empty) - goto unlock_irq_spinlock; - do { - struct ehca_eqe *eqe; - eqe = ehca_poll_eq(shca, &shca->eq); - if (!eqe) - break; - process_eqe(shca, eqe); - } while (1); - -unlock_irq_spinlock: - spin_unlock(&eq->irq_spinlock); -} - -void ehca_tasklet_eq(unsigned long data) -{ - ehca_process_eq((struct ehca_shca*)data, 1); -} - -static int find_next_online_cpu(struct ehca_comp_pool *pool) -{ - int cpu; - unsigned long flags; - - WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level >= 3) - ehca_dmp(cpu_online_mask, cpumask_size(), ""); - - spin_lock_irqsave(&pool->last_cpu_lock, flags); - do { - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; - } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); - spin_unlock_irqrestore(&pool->last_cpu_lock, flags); - - return cpu; -} - -static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct, - struct task_struct *thread) -{ - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); - spin_lock(&__cq->task_lock); - - if (__cq->nr_callbacks == 0) { - __cq->nr_callbacks++; - list_add_tail(&__cq->entry, &cct->cq_list); - cct->cq_jobs++; - wake_up_process(thread); - } else - __cq->nr_callbacks++; - - spin_unlock(&__cq->task_lock); - spin_unlock_irqrestore(&cct->task_lock, flags); -} - -static void queue_comp_task(struct ehca_cq *__cq) -{ - int cpu_id; - struct ehca_cpu_comp_task *cct; - struct task_struct *thread; - int cq_jobs; - unsigned long flags; - - cpu_id = find_next_online_cpu(pool); - BUG_ON(!cpu_online(cpu_id)); - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - - spin_lock_irqsave(&cct->task_lock, flags); - cq_jobs = cct->cq_jobs; - spin_unlock_irqrestore(&cct->task_lock, flags); - if (cq_jobs > 0) { - cpu_id = find_next_online_cpu(pool); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - } - __queue_comp_task(__cq, cct, thread); -} - -static void run_comp_task(struct ehca_cpu_comp_task *cct) -{ - struct ehca_cq *cq; - - while (!list_empty(&cct->cq_list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irq(&cct->task_lock); - - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - spin_lock_irq(&cct->task_lock); - spin_lock(&cq->task_lock); - cq->nr_callbacks--; - if (!cq->nr_callbacks) { - list_del_init(cct->cq_list.next); - cct->cq_jobs--; - } - spin_unlock(&cq->task_lock); - } -} - -static void comp_task_park(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - struct ehca_cpu_comp_task *target; - struct task_struct *thread; - struct ehca_cq *cq, *tmp; - LIST_HEAD(list); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - list_splice_init(&cct->cq_list, &list); - spin_unlock_irq(&cct->task_lock); - - cpu = find_next_online_cpu(pool); - target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); - spin_lock_irq(&target->task_lock); - list_for_each_entry_safe(cq, tmp, &list, entry) { - list_del(&cq->entry); - __queue_comp_task(cq, target, thread); - } - spin_unlock_irq(&target->task_lock); -} - -static void comp_task_stop(unsigned int cpu, bool online) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - WARN_ON(!list_empty(&cct->cq_list)); - spin_unlock_irq(&cct->task_lock); -} - -static int comp_task_should_run(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - return cct->cq_jobs; -} - -static void comp_task(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); - int cql_empty; - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - if (!cql_empty) { - __set_current_state(TASK_RUNNING); - run_comp_task(cct); - } - spin_unlock_irq(&cct->task_lock); -} - -static struct smp_hotplug_thread comp_pool_threads = { - .thread_should_run = comp_task_should_run, - .thread_fn = comp_task, - .thread_comm = "ehca_comp/%u", - .cleanup = comp_task_stop, - .park = comp_task_park, -}; - -int ehca_create_comp_pool(void) -{ - int cpu, ret = -ENOMEM; - - if (!ehca_scaling_code) - return 0; - - pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); - if (pool == NULL) - return -ENOMEM; - - spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = cpumask_any(cpu_online_mask); - - pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (!pool->cpu_comp_tasks) - goto out_pool; - - pool->cpu_comp_threads = alloc_percpu(struct task_struct *); - if (!pool->cpu_comp_threads) - goto out_tasks; - - for_each_present_cpu(cpu) { - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - } - - comp_pool_threads.store = pool->cpu_comp_threads; - ret = smpboot_register_percpu_thread(&comp_pool_threads); - if (ret) - goto out_threads; - - pr_info("eHCA scaling code enabled\n"); - return ret; - -out_threads: - free_percpu(pool->cpu_comp_threads); -out_tasks: - free_percpu(pool->cpu_comp_tasks); -out_pool: - kfree(pool); - return ret; -} - -void ehca_destroy_comp_pool(void) -{ - if (!ehca_scaling_code) - return; - - smpboot_unregister_percpu_thread(&comp_pool_threads); - - free_percpu(pool->cpu_comp_threads); - free_percpu(pool->cpu_comp_tasks); - kfree(pool); -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h deleted file mode 100644 index 5370199..0000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions and structs for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IRQ_H -#define __EHCA_IRQ_H - - -struct ehca_shca; - -#include -#include - -int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); -void ehca_tasklet_neq(unsigned long data); - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); -void ehca_tasklet_eq(unsigned long data); -void ehca_process_eq(struct ehca_shca *shca, int is_irq); - -struct ehca_cpu_comp_task { - struct list_head cq_list; - spinlock_t task_lock; - int cq_jobs; - int active; -}; - -struct ehca_comp_pool { - struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; - struct task_struct * __percpu *cpu_comp_threads; - int last_cpu; - spinlock_t last_cpu_lock; -}; - -int ehca_create_comp_pool(void); -void ehca_destroy_comp_pool(void); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h deleted file mode 100644 index 80e6a3d..0000000 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions for internal functions - * - * Authors: Heiko J Schick - * Dietmar Decker - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IVERBS_H__ -#define __EHCA_IVERBS_H__ - -#include "ehca_classes.h" - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw); - -int ehca_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props); - -enum rdma_protocol_type -ehca_query_protocol(struct ib_device *device, u8 port_num); - -int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, - struct ehca_sma_attr *attr); - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); - -int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid); - -int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, - struct ib_port_modify *props); - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_dealloc_pd(struct ib_pd *pd); - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_destroy_ah(struct ib_ah *ah); - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, u64 *iova_start); - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata); - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, int mr_access_flags, u64 *iova_start); - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -int ehca_dereg_mr(struct ib_mr *mr); - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); - -int ehca_dealloc_mw(struct ib_mw *mw); - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, u64 iova); - -int ehca_unmap_fmr(struct list_head *fmr_list); - -int ehca_dealloc_fmr(struct ib_fmr *fmr); - -enum ehca_eq_type { - EHCA_EQ = 0, /* Event Queue */ - EHCA_NEQ /* Notification Event Queue */ -}; - -int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, - enum ehca_eq_type type, const u32 length); - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); - - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_destroy_cq(struct ib_cq *cq); - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); - -int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_destroy_qp(struct ib_qp *qp); - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata); - -int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - -int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - -int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - -int ehca_destroy_srq(struct ib_srq *srq); - -u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, - struct ib_qp_init_attr *qp_init_attr); - -int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata); - -int ehca_dealloc_ucontext(struct ib_ucontext *context); - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); - -void ehca_poll_eqs(unsigned long data); - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd); - -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); - -#ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(gfp_t flags); -void ehca_free_fw_ctrlblock(void *ptr); -#else -#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) -#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) -#endif - -void ehca_recover_sqp(struct ib_qp *sqp); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c deleted file mode 100644 index 8246418..0000000 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * module start stop, hca detection - * - * Authors: Heiko J Schick - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef CONFIG_PPC_64K_PAGES -#include -#endif - -#include -#include -#include -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -#define HCAD_VERSION "0029" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Christoph Raisch "); -MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION(HCAD_VERSION); - -static bool ehca_open_aqp1 = 0; -static int ehca_hw_level = 0; -static bool ehca_poll_all_eqs = 1; - -int ehca_debug_level = 0; -int ehca_nr_ports = -1; -bool ehca_use_hp_mr = 0; -int ehca_port_act_time = 30; -int ehca_static_rate = -1; -bool ehca_scaling_code = 0; -int ehca_lock_hcalls = -1; -int ehca_max_cq = -1; -int ehca_max_qp = -1; - -module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); -module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); -module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); -module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); -module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); -module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); -module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); -module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); -module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); -module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); - -MODULE_PARM_DESC(open_aqp1, - "Open AQP1 on startup (default: no)"); -MODULE_PARM_DESC(debug_level, - "Amount of debug output (0: none (default), 1: traces, " - "2: some dumps, 3: lots)"); -MODULE_PARM_DESC(hw_level, - "Hardware level (0: autosensing (default), " - "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); -MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect (default), " - "1: port one only, 2: two ports)"); -MODULE_PARM_DESC(use_hp_mr, - "Use high performance MRs (default: no)"); -MODULE_PARM_DESC(port_act_time, - "Time to wait for port activation (default: 30 sec)"); -MODULE_PARM_DESC(poll_all_eqs, - "Poll all event queues periodically (default: yes)"); -MODULE_PARM_DESC(static_rate, - "Set permanent static rate (default: no static rate)"); -MODULE_PARM_DESC(scaling_code, - "Enable scaling code (default: no)"); -MODULE_PARM_DESC(lock_hcalls, - "Serialize all hCalls made by the driver " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_cqs, - "Max number of CQs which can be allocated " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_qps, - "Max number of QPs which can be allocated " - "(default: autodetect)"); - -DEFINE_RWLOCK(ehca_qp_idr_lock); -DEFINE_RWLOCK(ehca_cq_idr_lock); -DEFINE_IDR(ehca_qp_idr); -DEFINE_IDR(ehca_cq_idr); - -static LIST_HEAD(shca_list); /* list of all registered ehcas */ -DEFINE_SPINLOCK(shca_list_lock); - -static struct timer_list poll_eqs_timer; - -#ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache; - -void *ehca_alloc_fw_ctrlblock(gfp_t flags) -{ - void *ret = kmem_cache_zalloc(ctblk_cache, flags); - if (!ret) - ehca_gen_err("Out of memory for ctblk"); - return ret; -} - -void ehca_free_fw_ctrlblock(void *ptr) -{ - if (ptr) - kmem_cache_free(ctblk_cache, ptr); - -} -#endif - -int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: - return -EBUSY; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - -static int ehca_create_slab_caches(void) -{ - int ret; - - ret = ehca_init_pd_cache(); - if (ret) { - ehca_gen_err("Cannot create PD SLAB cache."); - return ret; - } - - ret = ehca_init_cq_cache(); - if (ret) { - ehca_gen_err("Cannot create CQ SLAB cache."); - goto create_slab_caches2; - } - - ret = ehca_init_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create QP SLAB cache."); - goto create_slab_caches3; - } - - ret = ehca_init_av_cache(); - if (ret) { - ehca_gen_err("Cannot create AV SLAB cache."); - goto create_slab_caches4; - } - - ret = ehca_init_mrmw_cache(); - if (ret) { - ehca_gen_err("Cannot create MR&MW SLAB cache."); - goto create_slab_caches5; - } - - ret = ehca_init_small_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create small queue SLAB cache."); - goto create_slab_caches6; - } - -#ifdef CONFIG_PPC_64K_PAGES - ctblk_cache = kmem_cache_create("ehca_cache_ctblk", - EHCA_PAGESIZE, H_CB_ALIGNMENT, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ctblk_cache) { - ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_small_qp_cache(); - ret = -ENOMEM; - goto create_slab_caches6; - } -#endif - return 0; - -create_slab_caches6: - ehca_cleanup_mrmw_cache(); - -create_slab_caches5: - ehca_cleanup_av_cache(); - -create_slab_caches4: - ehca_cleanup_qp_cache(); - -create_slab_caches3: - ehca_cleanup_cq_cache(); - -create_slab_caches2: - ehca_cleanup_pd_cache(); - - return ret; -} - -static void ehca_destroy_slab_caches(void) -{ - ehca_cleanup_small_qp_cache(); - ehca_cleanup_mrmw_cache(); - ehca_cleanup_av_cache(); - ehca_cleanup_qp_cache(); - ehca_cleanup_cq_cache(); - ehca_cleanup_pd_cache(); -#ifdef CONFIG_PPC_64K_PAGES - if (ctblk_cache) - kmem_cache_destroy(ctblk_cache); -#endif -} - -#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) -#define EHCA_REVID EHCA_BMASK_IBM(40, 63) - -static struct cap_descr { - u64 mask; - char *descr; -} hca_cap_descr[] = { - { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, - { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, - { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, - { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, - { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, - { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, - { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, - { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, - { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, - { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, - { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, - { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, - { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, - { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, - { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, - { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, - { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, - { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, -}; - -static int ehca_sense_attributes(struct ehca_shca *shca) -{ - int i, ret = 0; - u64 h_ret; - struct hipz_query_hca *rblock; - struct hipz_query_port *port; - const char *loc_code; - - static const u32 pgsize_map[] = { - HCA_CAP_MR_PGSIZE_4K, 0x1000, - HCA_CAP_MR_PGSIZE_64K, 0x10000, - HCA_CAP_MR_PGSIZE_1M, 0x100000, - HCA_CAP_MR_PGSIZE_16M, 0x1000000, - }; - - ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->dev.of_node->full_name); - loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", - NULL); - if (loc_code) - ehca_gen_dbg(" ... location lode=%s", loc_code); - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_gen_err("Cannot allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - if (ehca_nr_ports == 1) - shca->num_ports = 1; - else - shca->num_ports = (u8)rblock->num_ports; - - ehca_gen_dbg(" ... found %x ports", rblock->num_ports); - - if (ehca_hw_level == 0) { - u32 hcaaver; - u32 revid; - - hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); - revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); - - ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - - if (hcaaver == 1) { - if (revid <= 3) - shca->hw_level = 0x10 | (revid + 1); - else - shca->hw_level = 0x14; - } else if (hcaaver == 2) { - if (revid == 0) - shca->hw_level = 0x21; - else if (revid == 0x10) - shca->hw_level = 0x22; - else if (revid == 0x20 || revid == 0x21) - shca->hw_level = 0x23; - } - - if (!shca->hw_level) { - ehca_gen_warn("unknown hardware version" - " - assuming default level"); - shca->hw_level = 0x22; - } - } else - shca->hw_level = ehca_hw_level; - ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - - shca->hca_cap = rblock->hca_cap_indicators; - ehca_gen_dbg(" ... HCA capabilities:"); - for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) - if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) - ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - - /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is - * a firmware property, so it's valid across all adapters - */ - if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, - shca->hca_cap); - - /* translate supported MR page sizes; always support 4K */ - shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; - for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) - if (rblock->memory_page_size_supported & pgsize_map[i]) - shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; - - /* Set maximum number of CQs and QPs to calculate EQ size */ - if (shca->max_num_qps == -1) - shca->max_num_qps = min_t(int, rblock->max_qp, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { - ehca_gen_warn("The requested number of QPs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_qp, rblock->max_qp); - shca->max_num_qps = rblock->max_qp; - } - - if (shca->max_num_cqs == -1) - shca->max_num_cqs = min_t(int, rblock->max_cq, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { - ehca_gen_warn("The requested number of CQs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_cq, rblock->max_cq); - } - - /* query max MTU from first port -- it's the same for all ports */ - port = (struct hipz_query_port *)rblock; - h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - shca->max_mtu = port->max_mtu; - -sense_attributes1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int init_node_guid(struct ehca_shca *shca) -{ - int ret = 0; - struct hipz_query_hca *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto init_node_guid1; - } - - memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); - -init_node_guid1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = ehca_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} - -static int ehca_init_device(struct ehca_shca *shca) -{ - int ret; - - ret = init_node_guid(shca); - if (ret) - return ret; - - strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); - shca->ib_device.owner = THIS_MODULE; - - shca->ib_device.uverbs_abi_ver = 8; - shca->ib_device.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - - shca->ib_device.node_type = RDMA_NODE_IB_CA; - shca->ib_device.phys_port_cnt = shca->num_ports; - shca->ib_device.num_comp_vectors = 1; - shca->ib_device.dma_device = &shca->ofdev->dev; - shca->ib_device.query_device = ehca_query_device; - shca->ib_device.query_port = ehca_query_port; - shca->ib_device.query_gid = ehca_query_gid; - shca->ib_device.query_pkey = ehca_query_pkey; - /* shca->in_device.modify_device = ehca_modify_device */ - shca->ib_device.modify_port = ehca_modify_port; - shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; - shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; - shca->ib_device.alloc_pd = ehca_alloc_pd; - shca->ib_device.dealloc_pd = ehca_dealloc_pd; - shca->ib_device.create_ah = ehca_create_ah; - /* shca->ib_device.modify_ah = ehca_modify_ah; */ - shca->ib_device.query_ah = ehca_query_ah; - shca->ib_device.destroy_ah = ehca_destroy_ah; - shca->ib_device.create_qp = ehca_create_qp; - shca->ib_device.modify_qp = ehca_modify_qp; - shca->ib_device.query_qp = ehca_query_qp; - shca->ib_device.destroy_qp = ehca_destroy_qp; - shca->ib_device.post_send = ehca_post_send; - shca->ib_device.post_recv = ehca_post_recv; - shca->ib_device.create_cq = ehca_create_cq; - shca->ib_device.destroy_cq = ehca_destroy_cq; - shca->ib_device.resize_cq = ehca_resize_cq; - shca->ib_device.poll_cq = ehca_poll_cq; - /* shca->ib_device.peek_cq = ehca_peek_cq; */ - shca->ib_device.req_notify_cq = ehca_req_notify_cq; - /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ - shca->ib_device.get_dma_mr = ehca_get_dma_mr; - shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; - shca->ib_device.reg_user_mr = ehca_reg_user_mr; - shca->ib_device.query_mr = ehca_query_mr; - shca->ib_device.dereg_mr = ehca_dereg_mr; - shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; - shca->ib_device.alloc_mw = ehca_alloc_mw; - shca->ib_device.bind_mw = ehca_bind_mw; - shca->ib_device.dealloc_mw = ehca_dealloc_mw; - shca->ib_device.alloc_fmr = ehca_alloc_fmr; - shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; - shca->ib_device.unmap_fmr = ehca_unmap_fmr; - shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; - shca->ib_device.attach_mcast = ehca_attach_mcast; - shca->ib_device.detach_mcast = ehca_detach_mcast; - shca->ib_device.process_mad = ehca_process_mad; - shca->ib_device.mmap = ehca_mmap; - shca->ib_device.dma_ops = &ehca_dma_mapping_ops; - shca->ib_device.get_port_immutable = ehca_port_immutable; - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - shca->ib_device.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - - shca->ib_device.create_srq = ehca_create_srq; - shca->ib_device.modify_srq = ehca_modify_srq; - shca->ib_device.query_srq = ehca_query_srq; - shca->ib_device.destroy_srq = ehca_destroy_srq; - shca->ib_device.post_srq_recv = ehca_post_srq_recv; - } - - return ret; -} - -static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) -{ - struct ehca_sport *sport = &shca->sport[port - 1]; - struct ib_cq *ibcq; - struct ib_qp *ibqp; - struct ib_qp_init_attr qp_init_attr; - struct ib_cq_init_attr cq_attr = {}; - int ret; - - if (sport->ibcq_aqp1) { - ehca_err(&shca->ib_device, "AQP1 CQ is already created."); - return -EPERM; - } - - cq_attr.cqe = 10; - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), - &cq_attr); - if (IS_ERR(ibcq)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); - return PTR_ERR(ibcq); - } - sport->ibcq_aqp1 = ibcq; - - if (sport->ibqp_sqp[IB_QPT_GSI]) { - ehca_err(&shca->ib_device, "AQP1 QP is already created."); - ret = -EPERM; - goto create_aqp1; - } - - memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); - qp_init_attr.send_cq = ibcq; - qp_init_attr.recv_cq = ibcq; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = 100; - qp_init_attr.cap.max_recv_wr = 100; - qp_init_attr.cap.max_send_sge = 2; - qp_init_attr.cap.max_recv_sge = 1; - qp_init_attr.qp_type = IB_QPT_GSI; - qp_init_attr.port_num = port; - qp_init_attr.qp_context = NULL; - qp_init_attr.event_handler = NULL; - qp_init_attr.srq = NULL; - - ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); - if (IS_ERR(ibqp)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); - ret = PTR_ERR(ibqp); - goto create_aqp1; - } - sport->ibqp_sqp[IB_QPT_GSI] = ibqp; - - return 0; - -create_aqp1: - ib_destroy_cq(sport->ibcq_aqp1); - return ret; -} - -static int ehca_destroy_aqp1(struct ehca_sport *sport) -{ - int ret; - - ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); - if (ret) { - ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); - return ret; - } - - ret = ib_destroy_cq(sport->ibcq_aqp1); - if (ret) - ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); - - return ret; -} - -static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); -} - -static ssize_t ehca_store_debug_level(struct device_driver *ddp, - const char *buf, size_t count) -{ - int value = (*buf) - '0'; - if (value >= 0 && value <= 9) - ehca_debug_level = value; - return 1; -} - -static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, - ehca_show_debug_level, ehca_store_debug_level); - -static struct attribute *ehca_drv_attrs[] = { - &driver_attr_debug_level.attr, - NULL -}; - -static struct attribute_group ehca_drv_attr_grp = { - .attrs = ehca_drv_attrs -}; - -static const struct attribute_group *ehca_drv_attr_groups[] = { - &ehca_drv_attr_grp, - NULL, -}; - -#define EHCA_RESOURCE_ATTR(name) \ -static ssize_t ehca_show_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct ehca_shca *shca; \ - struct hipz_query_hca *rblock; \ - int data; \ - \ - shca = dev_get_drvdata(dev); \ - \ - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ - if (!rblock) { \ - dev_err(dev, "Can't allocate rblock memory.\n"); \ - return 0; \ - } \ - \ - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ - dev_err(dev, "Can't query device properties\n"); \ - ehca_free_fw_ctrlblock(rblock); \ - return 0; \ - } \ - \ - data = rblock->name; \ - ehca_free_fw_ctrlblock(rblock); \ - \ - if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ - return snprintf(buf, 256, "1\n"); \ - else \ - return snprintf(buf, 256, "%d\n", data); \ - \ -} \ -static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); - -EHCA_RESOURCE_ATTR(num_ports); -EHCA_RESOURCE_ATTR(hw_ver); -EHCA_RESOURCE_ATTR(max_eq); -EHCA_RESOURCE_ATTR(cur_eq); -EHCA_RESOURCE_ATTR(max_cq); -EHCA_RESOURCE_ATTR(cur_cq); -EHCA_RESOURCE_ATTR(max_qp); -EHCA_RESOURCE_ATTR(cur_qp); -EHCA_RESOURCE_ATTR(max_mr); -EHCA_RESOURCE_ATTR(cur_mr); -EHCA_RESOURCE_ATTR(max_mw); -EHCA_RESOURCE_ATTR(cur_mw); -EHCA_RESOURCE_ATTR(max_pd); -EHCA_RESOURCE_ATTR(max_ah); - -static ssize_t ehca_show_adapter_handle(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ehca_shca *shca = dev_get_drvdata(dev); - - return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); - -} -static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); - -static struct attribute *ehca_dev_attrs[] = { - &dev_attr_adapter_handle.attr, - &dev_attr_num_ports.attr, - &dev_attr_hw_ver.attr, - &dev_attr_max_eq.attr, - &dev_attr_cur_eq.attr, - &dev_attr_max_cq.attr, - &dev_attr_cur_cq.attr, - &dev_attr_max_qp.attr, - &dev_attr_cur_qp.attr, - &dev_attr_max_mr.attr, - &dev_attr_cur_mr.attr, - &dev_attr_max_mw.attr, - &dev_attr_cur_mw.attr, - &dev_attr_max_pd.attr, - &dev_attr_max_ah.attr, - NULL -}; - -static struct attribute_group ehca_dev_attr_grp = { - .attrs = ehca_dev_attrs -}; - -static int ehca_probe(struct platform_device *dev) -{ - struct ehca_shca *shca; - const u64 *handle; - struct ib_pd *ibpd; - int ret, i, eq_size; - unsigned long flags; - - handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); - if (!handle) { - ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - if (!(*handle)) { - ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); - if (!shca) { - ehca_gen_err("Cannot allocate shca memory."); - return -ENOMEM; - } - - mutex_init(&shca->modify_mutex); - atomic_set(&shca->num_cqs, 0); - atomic_set(&shca->num_qps, 0); - shca->max_num_qps = ehca_max_qp; - shca->max_num_cqs = ehca_max_cq; - - for (i = 0; i < ARRAY_SIZE(shca->sport); i++) - spin_lock_init(&shca->sport[i].mod_sqp_lock); - - shca->ofdev = dev; - shca->ipz_hca_handle.handle = *handle; - dev_set_drvdata(&dev->dev, shca); - - ret = ehca_sense_attributes(shca); - if (ret < 0) { - ehca_gen_err("Cannot sense eHCA attributes."); - goto probe1; - } - - ret = ehca_init_device(shca); - if (ret) { - ehca_gen_err("Cannot init ehca device struct"); - goto probe1; - } - - eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; - /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe1; - } - - ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create NEQ."); - goto probe3; - } - - /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); - if (IS_ERR(ibpd)) { - ehca_err(&shca->ib_device, "Cannot create internal PD."); - ret = PTR_ERR(ibpd); - goto probe4; - } - - shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); - shca->pd->ib_pd.device = &shca->ib_device; - - /* create internal max MR */ - ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); - - if (ret) { - ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", - ret); - goto probe5; - } - - ret = ib_register_device(&shca->ib_device, NULL); - if (ret) { - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%i", ret); - goto probe6; - } - - /* create AQP1 for port 1 */ - if (ehca_open_aqp1 == 1) { - shca->sport[0].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 1); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 1."); - goto probe7; - } - } - - /* create AQP1 for port 2 */ - if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { - shca->sport[1].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 2); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 2."); - goto probe8; - } - } - - ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_err(&shca->ib_device, - "Cannot create device attributes ret=%d", ret); - - spin_lock_irqsave(&shca_list_lock, flags); - list_add(&shca->shca_list, &shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return 0; - -probe8: - ret = ehca_destroy_aqp1(&shca->sport[0]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port 1. ret=%i", ret); - -probe7: - ib_unregister_device(&shca->ib_device); - -probe6: - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%x", ret); - -probe5: - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%x", ret); - -probe4: - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy NEQ. ret=%x", ret); - -probe3: - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy EQ. ret=%x", ret); - -probe1: - ib_dealloc_device(&shca->ib_device); - - return -EINVAL; -} - -static int ehca_remove(struct platform_device *dev) -{ - struct ehca_shca *shca = dev_get_drvdata(&dev->dev); - unsigned long flags; - int ret; - - sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); - - if (ehca_open_aqp1 == 1) { - int i; - for (i = 0; i < shca->num_ports; i++) { - ret = ehca_destroy_aqp1(&shca->sport[i]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port %x " - "ret=%i", ret, i); - } - } - - ib_unregister_device(&shca->ib_device); - - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%i", ret); - - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); - - ib_dealloc_device(&shca->ib_device); - - spin_lock_irqsave(&shca_list_lock, flags); - list_del(&shca->shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return ret; -} - -static struct of_device_id ehca_device_table[] = -{ - { - .name = "lhca", - .compatible = "IBM,lhca", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, ehca_device_table); - -static struct platform_driver ehca_driver = { - .probe = ehca_probe, - .remove = ehca_remove, - .driver = { - .name = "ehca", - .owner = THIS_MODULE, - .groups = ehca_drv_attr_groups, - .of_match_table = ehca_device_table, - }, -}; - -void ehca_poll_eqs(unsigned long data) -{ - struct ehca_shca *shca; - - spin_lock(&shca_list_lock); - list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) { - /* call deadman proc only if eq ptr does not change */ - struct ehca_eq *eq = &shca->eq; - int max = 3; - volatile u64 q_ofs, q_ofs2; - unsigned long flags; - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - do { - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs2 = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - max--; - } while (q_ofs == q_ofs2 && max > 0); - if (q_ofs == q_ofs2) - ehca_process_eq(shca, 0); - } - } - mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); - spin_unlock(&shca_list_lock); -} - -static int ehca_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - static unsigned long ehca_dmem_warn_time; - unsigned long flags; - - switch (action) { - case MEM_CANCEL_OFFLINE: - case MEM_CANCEL_ONLINE: - case MEM_ONLINE: - case MEM_OFFLINE: - return NOTIFY_OK; - case MEM_GOING_ONLINE: - case MEM_GOING_OFFLINE: - /* only ok if no hca is attached to the lpar */ - spin_lock_irqsave(&shca_list_lock, flags); - if (list_empty(&shca_list)) { - spin_unlock_irqrestore(&shca_list_lock, flags); - return NOTIFY_OK; - } else { - spin_unlock_irqrestore(&shca_list_lock, flags); - if (printk_timed_ratelimit(&ehca_dmem_warn_time, - 30 * 1000)) - ehca_gen_err("DMEM operations are not allowed" - "in conjunction with eHCA"); - return NOTIFY_BAD; - } - } - return NOTIFY_OK; -} - -static struct notifier_block ehca_mem_nb = { - .notifier_call = ehca_mem_notifier, -}; - -static int __init ehca_module_init(void) -{ - int ret; - - printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Version " HCAD_VERSION ")\n"); - - ret = ehca_create_comp_pool(); - if (ret) { - ehca_gen_err("Cannot create comp pool."); - return ret; - } - - ret = ehca_create_slab_caches(); - if (ret) { - ehca_gen_err("Cannot create SLAB caches"); - ret = -ENOMEM; - goto module_init1; - } - - ret = ehca_create_busmap(); - if (ret) { - ehca_gen_err("Cannot create busmap."); - goto module_init2; - } - - ret = ibmebus_register_driver(&ehca_driver); - if (ret) { - ehca_gen_err("Cannot register eHCA device driver"); - ret = -EINVAL; - goto module_init3; - } - - ret = register_memory_notifier(&ehca_mem_nb); - if (ret) { - ehca_gen_err("Failed registering memory add/remove notifier"); - goto module_init4; - } - - if (ehca_poll_all_eqs != 1) { - ehca_gen_err("WARNING!!!"); - ehca_gen_err("It is possible to lose interrupts."); - } else { - init_timer(&poll_eqs_timer); - poll_eqs_timer.function = ehca_poll_eqs; - poll_eqs_timer.expires = jiffies + HZ; - add_timer(&poll_eqs_timer); - } - - return 0; - -module_init4: - ibmebus_unregister_driver(&ehca_driver); - -module_init3: - ehca_destroy_busmap(); - -module_init2: - ehca_destroy_slab_caches(); - -module_init1: - ehca_destroy_comp_pool(); - return ret; -}; - -static void __exit ehca_module_exit(void) -{ - if (ehca_poll_all_eqs == 1) - del_timer_sync(&poll_eqs_timer); - - ibmebus_unregister_driver(&ehca_driver); - - unregister_memory_notifier(&ehca_mem_nb); - - ehca_destroy_busmap(); - - ehca_destroy_slab_caches(); - - ehca_destroy_comp_pool(); - - idr_destroy(&ehca_cq_idr); - idr_destroy(&ehca_qp_idr); -}; - -module_init(ehca_module_init); -module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c deleted file mode 100644 index cec1815..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * mcast functions - * - * Authors: Khadija Souissi - * Waleri Fomin - * Reinhard Ernst - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define MAX_MC_LID 0xFFFE -#define MIN_MC_LID 0xC000 /* Multicast limits */ -#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) -#define EHCA_VALID_MULTICAST_LID(lid) \ - (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) - -int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} - -int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->pd->device, - struct ehca_shca, ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c deleted file mode 100644 index f914b30..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ /dev/null @@ -1,2593 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "hcp_if.h" -#include "hipz_hw.h" - -#define NUM_CHUNKS(length, chunk_size) \ - (((length) + (chunk_size - 1)) / (chunk_size)) - -/* max number of rpages (per hcall register_rpages) */ -#define MAX_RPAGES 512 - -/* DMEM toleration management */ -#define EHCA_SECTSHIFT SECTION_SIZE_BITS -#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) -#define EHCA_HUGEPAGESHIFT 34 -#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) -#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) -#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL -#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ -#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) -#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) -#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ -#define EHCA_DIR_MAP_SIZE (0x10000) -#define EHCA_ENT_MAP_SIZE (0x10000) -#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) - -static unsigned long ehca_mr_len; - -/* - * Memory map data structures - */ -struct ehca_dir_bmap { - u64 ent[EHCA_MAP_ENTRIES]; -}; -struct ehca_top_bmap { - struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; -}; -struct ehca_bmap { - struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; -}; - -static struct ehca_bmap *ehca_bmap; - -static struct kmem_cache *mr_cache; -static struct kmem_cache *mw_cache; - -enum ehca_mr_pgsize { - EHCA_MR_PGSIZE4K = 0x1000L, - EHCA_MR_PGSIZE64K = 0x10000L, - EHCA_MR_PGSIZE1M = 0x100000L, - EHCA_MR_PGSIZE16M = 0x1000000L -}; - -#define EHCA_MR_PGSHIFT4K 12 -#define EHCA_MR_PGSHIFT64K 16 -#define EHCA_MR_PGSHIFT1M 20 -#define EHCA_MR_PGSHIFT16M 24 - -static u64 ehca_map_vaddr(void *caddr); - -static u32 ehca_encode_hwpage_size(u32 pgsize) -{ - int log = ilog2(pgsize); - WARN_ON(log < 12 || log > 24 || log & 3); - return (log - 12) / 4; -} - -static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) -{ - return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); -} - -static struct ehca_mr *ehca_mr_new(void) -{ - struct ehca_mr *me; - - me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mrlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mr_delete(struct ehca_mr *me) -{ - kmem_cache_free(mr_cache, me); -} - -static struct ehca_mw *ehca_mw_new(void) -{ - struct ehca_mw *me; - - me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mwlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mw_delete(struct ehca_mw *me) -{ - kmem_cache_free(mw_cache, me); -} - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_maxmr; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - - if (shca->maxmr) { - e_maxmr = ehca_mr_new(); - if (!e_maxmr) { - ehca_err(&shca->ib_device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto get_dma_mr_exit0; - } - - ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), - mr_access_flags, e_pd, - &e_maxmr->ib.ib_mr.lkey, - &e_maxmr->ib.ib_mr.rkey); - if (ret) { - ehca_mr_delete(e_maxmr); - ib_mr = ERR_PTR(ret); - goto get_dma_mr_exit0; - } - ib_mr = &e_maxmr->ib.ib_mr; - } else { - ehca_err(&shca->ib_device, "no internal max-MR exist!"); - ib_mr = ERR_PTR(-EINVAL); - goto get_dma_mr_exit0; - } - -get_dma_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", - PTR_ERR(ib_mr), pd, mr_access_flags); - return ib_mr; -} /* end ehca_get_dma_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - - u64 size; - - if ((num_phys_buf <= 0) || !phys_buf_array) { - ehca_err(pd->device, "bad input values: num_phys_buf=%x " - "phys_buf_array=%p", num_phys_buf, phys_buf_array); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, - iova_start, &size); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit0; - } - if ((size == 0) || - (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", - size, iova_start); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_phys_mr_exit0; - } - - /* register MR on HCA */ - if (ehca_mr_is_maxmr(size, iova_start)) { - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, - e_pd, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } else { - struct ehca_mr_pginfo pginfo; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, - PAGE_SIZE); - /* for kernel space we try most possible pgsize */ - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, - hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_phys_mr_exit1: - ehca_mr_delete(e_mr); -reg_phys_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " - "num_phys_buf=%x mr_access_flags=%x iova_start=%p", - PTR_ERR(ib_mr), pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ib_mr; -} /* end ehca_reg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata) -{ - struct ib_mr *ib_mr; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - int ret, page_shift; - u32 num_kpages; - u32 num_hwpages; - u64 hwpage_size; - - if (!pd) { - ehca_gen_err("bad pd=%p", pd); - return ERR_PTR(-EFAULT); - } - - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%llx " - "virt_base=%llx", length, virt); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_user_mr_exit0; - } - - e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(e_mr->umem)) { - ib_mr = (void *)e_mr->umem; - goto reg_user_mr_exit1; - } - - if (e_mr->umem->page_size != PAGE_SIZE) { - ehca_err(pd->device, "page size not supported, " - "e_mr->umem->page_size=%x", e_mr->umem->page_size); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit2; - } - - /* determine number of MR pages */ - num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); - /* select proper hw_pgsize */ - page_shift = PAGE_SHIFT; - if (e_mr->umem->hugetlb) { - /* determine page_shift, clamp between 4K and 16M */ - page_shift = (fls64(length - 1) + 3) & ~3; - page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), - EHCA_MR_PGSHIFT16M); - } - hwpage_size = 1UL << page_shift; - - /* now that we have the desired page size, shift until it's - * supported, too. 4K is always supported, so this terminates. - */ - while (!(hwpage_size & shca->hca_cap_mr_pgsize)) - hwpage_size >>= 4; - -reg_user_mr_fallback: - num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_USER; - pginfo.hwpage_size = hwpage_size; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; - pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; - ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { - ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%llx", hwpage_size); - ehca_info(pd->device, "try to register mr with " - "kpage_size=%lx", PAGE_SIZE); - /* - * this means kpages are not contiguous for a hw page - * try kernel page size as fallback solution - */ - hwpage_size = PAGE_SIZE; - goto reg_user_mr_fallback; - } - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_user_mr_exit2; - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_user_mr_exit2: - ib_umem_release(e_mr->umem); -reg_user_mr_exit1: - ehca_mr_delete(e_mr); -reg_user_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", - PTR_ERR(ib_mr), pd, mr_access_flags, udata); - return ib_mr; -} /* end ehca_reg_user_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - int ret; - - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - u64 new_size; - u64 *new_start; - u32 new_acl; - struct ehca_pd *new_pd; - u32 tmp_lkey, tmp_rkey; - unsigned long sl_flags; - u32 num_kpages = 0; - u32 num_hwpages = 0; - struct ehca_mr_pginfo pginfo; - - if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { - /* TODO not supported, because PHYP rereg hCall needs pages */ - ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " - "supported yet, mr_rereg_mask=%x", mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - if (mr_rereg_mask & IB_MR_REREG_PD) { - if (!pd) { - ehca_err(mr->device, "rereg with bad pd, pd=%p " - "mr_rereg_mask=%x", pd, mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - - if ((mr_rereg_mask & - ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || - (mr_rereg_mask == 0)) { - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* check other parameters */ - if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ - if (e_mr->flags & EHCA_MR_FLAG_FMR) { - ehca_err(mr->device, "not supported for FMR, mr=%p " - "flags=%x", mr, e_mr->flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values mr_rereg_mask=%x" - " phys_buf_array=%p num_phys_buf=%x", - mr_rereg_mask, phys_buf_array, num_phys_buf); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ - (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " - "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* set requested values dependent on rereg request */ - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; - new_size = e_mr->size; - new_acl = e_mr->acl; - new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - u64 hw_pgsize = ehca_get_max_hwpage_size(shca); - - new_start = iova_start; /* change address */ - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, - num_phys_buf, iova_start, - &new_size); - if (ret) - goto rereg_phys_mr_exit1; - if ((new_size == 0) || - (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%llx " - "iova_start=%p", new_size, iova_start); - ret = -EINVAL; - goto rereg_phys_mr_exit1; - } - num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + - new_size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + - new_size, hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - } - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - new_acl = mr_access_flags; - if (mr_rereg_mask & IB_MR_REREG_PD) - new_pd = container_of(pd, struct ehca_pd, ib_pd); - - ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, - new_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto rereg_phys_mr_exit1; - - /* successful reregistration */ - if (mr_rereg_mask & IB_MR_REREG_PD) - mr->pd = pd; - mr->lkey = tmp_lkey; - mr->rkey = tmp_rkey; - -rereg_phys_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -rereg_phys_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " - "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " - "iova_start=%p", - ret, mr, mr_rereg_mask, pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ret; -} /* end ehca_rereg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout; - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto query_mr_exit0; - } - - memset(mr_attr, 0, sizeof(struct ib_mr_attr)); - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - - h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " - "hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto query_mr_exit1; - } - mr_attr->pd = mr->pd; - mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; - ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); - -query_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -query_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", - ret, mr, mr_attr); - return ret; -} /* end ehca_query_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_mr(struct ib_mr *mr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto dereg_mr_exit0; - } else if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto dereg_mr_exit0; - } - - /* TODO: BUSY: MR still has bound window(s) */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " - "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", - h_ret, shca, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto dereg_mr_exit0; - } - - if (e_mr->umem) - ib_umem_release(e_mr->umem); - - /* successful deregistration */ - ehca_mr_delete(e_mr); - -dereg_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p", ret, mr); - return ret; -} /* end ehca_dereg_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) -{ - struct ib_mw *ib_mw; - u64 h_ret; - struct ehca_mw *e_mw; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - e_mw = ehca_mw_new(); - if (!e_mw) { - ib_mw = ERR_PTR(-ENOMEM); - goto alloc_mw_exit0; - } - - h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " - "shca=%p hca_hndl=%llx mw=%p", - h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); - goto alloc_mw_exit1; - } - /* successful MW allocation */ - e_mw->ipz_mw_handle = hipzout.handle; - e_mw->ib_mw.rkey = hipzout.rkey; - return &e_mw->ib_mw; - -alloc_mw_exit1: - ehca_mw_delete(e_mw); -alloc_mw_exit0: - if (IS_ERR(ib_mw)) - ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); - return ib_mw; -} /* end ehca_alloc_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* TODO: not supported up to now */ - ehca_gen_err("bind MW currently not supported by HCAD"); - - return -EPERM; -} /* end ehca_bind_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_mw(struct ib_mw *mw) -{ - u64 h_ret; - struct ehca_shca *shca = - container_of(mw->device, struct ehca_shca, ib_device); - struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); - - h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); - if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " - "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", - h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, - e_mw->ipz_mw_handle.handle); - return ehca2ib_return_code(h_ret); - } - /* successful deallocation */ - ehca_mw_delete(e_mw); - return 0; -} /* end ehca_dealloc_mw() */ - -/*----------------------------------------------------------------------*/ - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr *e_fmr; - int ret; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - u64 hw_pgsize; - - /* check other parameters */ - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if (mr_access_flags & IB_ACCESS_MW_BIND) { - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { - ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " - "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", - fmr_attr->max_pages, fmr_attr->max_maps, - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - hw_pgsize = 1 << fmr_attr->page_shift; - if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { - ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - e_fmr = ehca_mr_new(); - if (!e_fmr) { - ib_fmr = ERR_PTR(-ENOMEM); - goto alloc_fmr_exit0; - } - e_fmr->flags |= EHCA_MR_FLAG_FMR; - - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.hwpage_size = hw_pgsize; - /* - * pginfo.num_hwpages==0, ie register_rpages() will not be called - * but deferred to map_phys_fmr() - */ - ret = ehca_reg_mr(shca, e_fmr, NULL, - fmr_attr->max_pages * (1 << fmr_attr->page_shift), - mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey, EHCA_REG_MR); - if (ret) { - ib_fmr = ERR_PTR(ret); - goto alloc_fmr_exit1; - } - - /* successful */ - e_fmr->hwpage_size = hw_pgsize; - e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; - e_fmr->fmr_max_pages = fmr_attr->max_pages; - e_fmr->fmr_max_maps = fmr_attr->max_maps; - e_fmr->fmr_map_cnt = 0; - return &e_fmr->ib.ib_fmr; - -alloc_fmr_exit1: - ehca_mr_delete(e_fmr); -alloc_fmr_exit0: - return ib_fmr; -} /* end ehca_alloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, - int list_len, - u64 iova) -{ - int ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - u32 tmp_lkey, tmp_rkey; - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); - if (ret) - goto map_phys_fmr_exit0; - if (iova % e_fmr->fmr_page_size) { - /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", - iova, e_fmr->fmr_page_size); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { - /* HCAD does not limit the maps, however trace this anyway */ - ehca_info(fmr->device, "map limit exceeded, fmr=%p " - "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", - fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); - } - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_kpages = list_len; - pginfo.hwpage_size = e_fmr->hwpage_size; - pginfo.num_hwpages = - list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; - pginfo.u.fmr.page_list = page_list; - pginfo.next_hwpage = - (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; - pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - - ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, - list_len * e_fmr->fmr_page_size, - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto map_phys_fmr_exit0; - - /* successful reregistration */ - e_fmr->fmr_map_cnt++; - e_fmr->ib.ib_fmr.lkey = tmp_lkey; - e_fmr->ib.ib_fmr.rkey = tmp_rkey; - return 0; - -map_phys_fmr_exit0: - if (ret) - ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " - "iova=%llx", ret, fmr, page_list, list_len, iova); - return ret; -} /* end ehca_map_phys_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_fmr(struct list_head *fmr_list) -{ - int ret = 0; - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = NULL; - struct ehca_shca *prev_shca; - struct ehca_mr *e_fmr; - u32 num_fmr = 0; - u32 unmap_fmr_cnt = 0; - - /* check all FMR belong to same SHCA, and check internal flag */ - list_for_each_entry(ib_fmr, fmr_list, list) { - prev_shca = shca; - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - if ((shca != prev_shca) && prev_shca) { - ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " - "prev_shca=%p e_fmr=%p", - shca, prev_shca, e_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " - "e_fmr->flags=%x", e_fmr, e_fmr->flags); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - num_fmr++; - } - - /* loop over all FMRs to unmap */ - list_for_each_entry(ib_fmr, fmr_list, list) { - unmap_fmr_cnt++; - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - ret = ehca_unmap_one_fmr(shca, e_fmr); - if (ret) { - /* unmap failed, stop unmapping of rest of FMRs */ - ehca_err(&shca->ib_device, "unmap of one FMR failed, " - "stop rest, e_fmr=%p num_fmr=%x " - "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, - unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); - goto unmap_fmr_exit0; - } - } - -unmap_fmr_exit0: - if (ret) - ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", - ret, fmr_list, num_fmr, unmap_fmr_cnt); - return ret; -} /* end ehca_unmap_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_fmr(struct ib_fmr *fmr) -{ - int ret; - u64 h_ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto free_fmr_exit0; - } - - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " - "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca2ib_return_code(h_ret); - goto free_fmr_exit0; - } - /* successful deregistration */ - ehca_mr_delete(e_fmr); - return 0; - -free_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); - return ret; -} /* end ehca_dealloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey, /*OUT*/ - enum ehca_reg_type reg_type) -{ - int ret; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; - - h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " - "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_mr_exit0; - } - - e_mr->ipz_mr_handle = hipzout.handle; - - if (reg_type == EHCA_REG_BUSMAP_MR) - ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); - else if (reg_type == EHCA_REG_MR) - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); - else - ret = -EINVAL; - - if (ret) - goto ehca_reg_mr_exit1; - - /* successful registration */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_mr_exit1: - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", - h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages, ret); - ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " - "not recoverable"); - } -ehca_reg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", - ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int ret = 0; - u64 h_ret; - u32 rnum; - u64 rpage; - u32 i; - u64 *kpage; - - if (!pginfo->num_hwpages) /* in case of fmr */ - return 0; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_reg_mr_rpages_exit0; - } - - /* max MAX_RPAGES ehca mr pages per register call */ - for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ - if (rnum == 0) - rnum = MAX_RPAGES; /* last shot is full */ - } else - rnum = MAX_RPAGES; - - ret = ehca_set_pagebuf(pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%i rnum=%x kpage=%p", - ret, rnum, kpage); - goto ehca_reg_mr_rpages_exit1; - } - - if (rnum > 1) { - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p i=%x", - kpage, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } else - rpage = *kpage; - - h_ret = hipz_h_register_rpage_mr( - shca->ipz_hca_handle, e_mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - /* - * check for 'registration complete'==H_SUCCESS - * and for 'page registered'==H_PAGE_REGISTERED - */ - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%lli " - "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" - " lkey=%x", h_ret, e_mr, i, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } else if (h_ret != H_PAGE_REGISTERED) { - ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " - "mr_hndl=%llx", h_ret, e_mr, i, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } /* end for(i) */ - - -ehca_reg_mr_rpages_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_reg_mr_rpages_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, - pginfo, pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr_rpages() */ - -/*----------------------------------------------------------------------*/ - -inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - u32 acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret; - u64 h_ret; - u32 hipz_acl; - u64 *kpage; - u64 rpage; - struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_rereg_mr_rereg1_exit0; - } - - pginfo_save = *pginfo; - ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); - if (ret) { - ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " - "kpage=%p", e_mr, pginfo, pginfo->type, - pginfo->num_kpages, pginfo->num_hwpages, kpage); - goto ehca_rereg_mr_rereg1_exit1; - } - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p", kpage); - ret = -EFAULT; - goto ehca_rereg_mr_rereg1_exit1; - } - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, rpage, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * reregistration unsuccessful, try it again with the 3 hCalls, - * e.g. this is required in case H_MR_CONDITION - * (MW bound or MR is shared) - */ - ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); - *pginfo = pginfo_save; - ret = -EAGAIN; - } else if ((u64 *)hipzout.vaddr != iova_start) { - ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " - "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, - hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey, hipzout.lkey); - ret = -EFAULT; - } else { - /* - * successful reregistration - * note: start and start_out are identical for eServer HCAs - */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - } - -ehca_rereg_mr_rereg1_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_rereg_mr_rereg1_exit0: - if ( ret && (ret != -EAGAIN) ) - ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx", - ret, *lkey, *rkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages); - return ret; -} /* end ehca_rereg_mr_rereg1() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey) -{ - int ret = 0; - u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ - - /* first determine reregistration hCall(s) */ - if ((pginfo->num_hwpages > MAX_RPAGES) || - (e_mr->num_hwpages > MAX_RPAGES) || - (pginfo->num_hwpages > e_mr->num_hwpages)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", - pginfo->num_hwpages, e_mr->num_hwpages); - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ - rereg_1_hcall = 0; - rereg_3_hcall = 1; - e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; - ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", - e_mr); - } - - if (rereg_1_hcall) { - ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, - acl, e_pd, pginfo, lkey, rkey); - if (ret) { - if (ret == -EAGAIN) - rereg_3_hcall = 1; - else - goto ehca_rereg_mr_exit0; - } - } - - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first deregister old MR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " - "mr->lkey=%x", - h_ret, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_rereg_mr_exit0; - } - /* clean ehca_mr_t, without changing struct ib_mr and lock */ - save_mr = *e_mr; - ehca_mr_deletenew(e_mr); - - /* set some MR values */ - e_mr->flags = save_mr.flags; - e_mr->hwpage_size = save_mr.hwpage_size; - e_mr->fmr_page_size = save_mr.fmr_page_size; - e_mr->fmr_max_pages = save_mr.fmr_max_pages; - e_mr->fmr_max_maps = save_mr.fmr_max_maps; - e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; - memcpy(&e_mr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_rereg_mr_exit0; - } - } - -ehca_rereg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " - "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, - rereg_1_hcall, rereg_3_hcall); - return ret; -} /* end ehca_rereg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr) -{ - int ret = 0; - u64 h_ret; - struct ehca_pd *e_pd = - container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); - struct ehca_mr save_fmr; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - struct ehca_mr_hipzout_parms hipzout; - struct ehca_mr save_mr; - - if (e_fmr->fmr_max_pages <= MAX_RPAGES) { - /* - * note: after using rereg hcall with len=0, - * rereg hcall must be used again for registering pages - */ - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, - 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret == H_SUCCESS) { - /* successful reregistration */ - e_fmr->start = NULL; - e_fmr->size = 0; - tmp_lkey = hipzout.lkey; - tmp_rkey = hipzout.rkey; - return 0; - } - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " - "mr_hndl=%llx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - /* try free and rereg */ - } - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->hwpage_size = save_fmr.hwpage_size; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - } - -ehca_unmap_one_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); - return ret; -} /* end ehca_unmap_one_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret = 0; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, - shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_smr_exit0; - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_smr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " - "e_newmr=%p iova_start=%p acl=%x e_pd=%p", - ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); - return ret; -} /* end ehca_reg_smr() */ - -/*----------------------------------------------------------------------*/ -static inline void *ehca_calc_sectbase(int top, int dir, int idx) -{ - unsigned long ret = idx; - ret |= dir << EHCA_DIR_INDEX_SHIFT; - ret |= top << EHCA_TOP_INDEX_SHIFT; - return __va(ret << SECTION_SIZE_BITS); -} - -#define ehca_bmap_valid(entry) \ - ((u64)entry != (u64)EHCA_INVAL_ADDR) - -static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 h_ret = 0; - unsigned long page = 0; - u64 rpage = __pa(kpage); - int page_count; - - void *sectbase = ehca_calc_sectbase(top, dir, idx); - if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { - ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" - "hwpage_size does not fit to " - "section start address"); - } - page_count = EHCA_SECTSIZE / pginfo->hwpage_size; - - while (page < page_count) { - u64 rnum; - for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); - rnum++) { - void *pg = sectbase + ((page++) * pginfo->hwpage_size); - kpage[rnum] = __pa(pg); - } - - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { - ehca_err(&shca->ib_device, "register_rpage_mr failed"); - return h_ret; - } - } - return h_ret; -} - -static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int idx; - - for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) - continue; - - hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, - pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, - struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int dir; - - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -/* register internal max-MR to internal SHCA */ -int ehca_reg_internal_maxmr( - struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **e_maxmr) /*OUT*/ -{ - int ret; - struct ehca_mr *e_mr; - u64 *iova_start; - u64 size_maxmr; - struct ehca_mr_pginfo pginfo; - struct ib_phys_buf ib_pbuf; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - if (!ehca_bmap) { - ret = -EFAULT; - goto ehca_reg_internal_maxmr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(&shca->ib_device, "out of memory"); - ret = -ENOMEM; - goto ehca_reg_internal_maxmr_exit0; - } - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - - /* register internal max-MR on HCA */ - size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); - ib_pbuf.addr = 0; - ib_pbuf.size = size_maxmr; - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, - PAGE_SIZE); - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, - hw_pgsize); - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.u.phy.num_phys_buf = 1; - pginfo.u.phy.phys_buf_array = &ib_pbuf; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); - if (ret) { - ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " - "num_hwpages=%x", e_mr, iova_start, size_maxmr, - num_kpages, num_hwpages); - goto ehca_reg_internal_maxmr_exit1; - } - - /* successful registration of all pages */ - e_mr->ib.ib_mr.device = e_pd->ib_pd.device; - e_mr->ib.ib_mr.pd = &e_pd->ib_pd; - e_mr->ib.ib_mr.uobject = NULL; - atomic_inc(&(e_pd->ib_pd.usecnt)); - atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); - *e_maxmr = e_mr; - return 0; - -ehca_reg_internal_maxmr_exit1: - ehca_mr_delete(e_mr); -ehca_reg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", - ret, shca, e_pd, e_maxmr); - return ret; -} /* end ehca_reg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey) -{ - u64 h_ret; - struct ehca_mr *e_origmr = shca->maxmr; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, e_origmr, shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - return ehca2ib_return_code(h_ret); - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; -} /* end ehca_reg_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca) -{ - int ret; - struct ehca_mr *e_maxmr; - struct ib_pd *ib_pd; - - if (!shca->maxmr) { - ehca_err(&shca->ib_device, "bad call, shca=%p", shca); - ret = -EINVAL; - goto ehca_dereg_internal_maxmr_exit0; - } - - e_maxmr = shca->maxmr; - ib_pd = e_maxmr->ib.ib_mr.pd; - shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ - - ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); - if (ret) { - ehca_err(&shca->ib_device, "dereg internal max-MR failed, " - "ret=%i e_maxmr=%p shca=%p lkey=%x", - ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); - shca->maxmr = e_maxmr; - goto ehca_dereg_internal_maxmr_exit0; - } - - atomic_dec(&ib_pd->usecnt); - -ehca_dereg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", - ret, shca, shca->maxmr); - return ret; -} /* end ehca_dereg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* - * check physical buffer array of MR verbs for validness and - * calculates MR size - */ -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size) -{ - struct ib_phys_buf *pbuf = phys_buf_array; - u64 size_count = 0; - u32 i; - - if (num_phys_buf == 0) { - ehca_gen_err("bad phys buf array len, num_phys_buf=0"); - return -EINVAL; - } - /* check first buffer */ - if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { - ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%llx pbuf->size=%llx", - iova_start, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && - (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " - "pbuf->size=%llx", pbuf->addr, pbuf->size); - return -EINVAL; - } - - for (i = 0; i < num_phys_buf; i++) { - if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%llx " - "pbuf->size=%llx", - i, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((i > 0) && /* not 1st */ - (i < (num_phys_buf - 1)) && /* not last */ - (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%llx", - i, pbuf->size); - return -EINVAL; - } - size_count += pbuf->size; - pbuf++; - } - - *size = size_count; - return 0; -} /* end ehca_mr_chk_buf_and_calc_size() */ - -/*----------------------------------------------------------------------*/ - -/* check page list of map FMR verb for validness */ -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len) -{ - u32 i; - u64 *page; - - if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { - ehca_gen_err("bad list_len, list_len=%x " - "e_fmr->fmr_max_pages=%x fmr=%p", - list_len, e_fmr->fmr_max_pages, e_fmr); - return -EINVAL; - } - - /* each page must be aligned */ - page = page_list; - for (i = 0; i < list_len; i++) { - if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " - "fmr_page_size=%x", i, *page, page, e_fmr, - e_fmr->fmr_page_size); - return -EINVAL; - } - page++; - } - - return 0; -} /* end ehca_fmr_check_page_list() */ - -/*----------------------------------------------------------------------*/ - -/* PAGE_SIZE >= pginfo->hwpage_size */ -static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr; - u32 j = 0; - int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - pgaddr = page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx " - "sg_dma_address=%llx " - "entry=%llx next_hwpage=%llx", - pgaddr, (u64)sg_dma_address(*sg), - pginfo->u.usr.next_nmap, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - *sg = sg_next(*sg); - } - j++; - if (j >= number) - break; - } - - return ret; -} - -/* - * check given pages for contiguous layout - * last page addr is returned in prev_pgaddr for further check - */ -static int ehca_check_kpages_per_ate(struct scatterlist **sg, - int num_pages, - u64 *prev_pgaddr) -{ - for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { - u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; - if (ehca_debug_level >= 3) - ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)__va(pgaddr)); - if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx entries_left_in_hwpage=%x", - pgaddr, *prev_pgaddr, num_pages); - return -EINVAL; - } - *prev_pgaddr = pgaddr; - } - return 0; -} - -/* PAGE_SIZE < pginfo->hwpage_size */ -static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr, prev_pgaddr; - u32 j = 0; - int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; - int nr_kpages = kpages_per_hwpage; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - - if (nr_kpages == kpages_per_hwpage) { - pgaddr = (page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT); - *kpage = pgaddr; - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx entry=%llx", - pgaddr, pginfo->u.usr.next_nmap); - ret = -EFAULT; - return ret; - } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx entry=%llx " - "mr_pgsize=%llx", - pgaddr, pginfo->u.usr.next_nmap, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - *sg = sg_next(*sg); - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; - } - - ret = ehca_check_kpages_per_ate(sg, nr_kpages, - &prev_pgaddr); - if (ret) - return ret; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; - -next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) - break; - } - - return ret; -} - -static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - struct ib_phys_buf *pbuf; - u64 num_hw, offs_hw; - u32 i = 0; - - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; - num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + - pbuf->size, pginfo->hwpage_size); - offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / - pginfo->hwpage_size; - while (pginfo->next_hwpage < offs_hw + num_hw) { - /* sanity check */ - if ((pginfo->kpage_cnt >= pginfo->num_kpages) || - (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { - ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%llx num_kpages=%llx " - "hwpage_cnt=%llx " - "num_hwpages=%llx i=%x", - pginfo->kpage_cnt, - pginfo->num_kpages, - pginfo->hwpage_cnt, - pginfo->num_hwpages, i); - return -EFAULT; - } - *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " - "next_hwpage=%llx", pbuf->addr, - pbuf->size, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - if (PAGE_SIZE >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (PAGE_SIZE / pginfo->hwpage_size) == 0) - (pginfo->kpage_cnt)++; - } else - pginfo->kpage_cnt += pginfo->hwpage_size / - PAGE_SIZE; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_hwpage >= offs_hw + num_hw) { - (pginfo->u.phy.next_buf)++; - pginfo->next_hwpage = 0; - } - } - return ret; -} - -static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - u64 *fmrlist; - u32 i; - - /* loop over desired page_list entries */ - fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; - for (i = 0; i < number; i++) { - *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size; - if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%llx fmrlist=%p " - "next_listelem=%llx next_hwpage=%llx", - *fmrlist, fmrlist, - pginfo->u.fmr.next_listelem, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (pginfo->u.fmr.fmr_pgsize / - pginfo->hwpage_size) == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.fmr.next_listelem)++; - fmrlist++; - pginfo->next_hwpage = 0; - } else - (pginfo->next_hwpage)++; - } else { - unsigned int cnt_per_hwpage = pginfo->hwpage_size / - pginfo->u.fmr.fmr_pgsize; - unsigned int j; - u64 prev = *kpage; - /* check if adrs are contiguous */ - for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); - if (prev + pginfo->u.fmr.fmr_pgsize != p) { - ehca_gen_err("uncontiguous fmr pages " - "found prev=%llx p=%llx " - "idx=%x", prev, p, i + j); - return -EINVAL; - } - prev = p; - } - pginfo->kpage_cnt += cnt_per_hwpage; - pginfo->u.fmr.next_listelem += cnt_per_hwpage; - fmrlist += cnt_per_hwpage; - } - kpage++; - } - return ret; -} - -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret; - - switch (pginfo->type) { - case EHCA_MR_PGI_PHYS: - ret = ehca_set_pagebuf_phys(pginfo, number, kpage); - break; - case EHCA_MR_PGI_USER: - ret = PAGE_SIZE >= pginfo->hwpage_size ? - ehca_set_pagebuf_user1(pginfo, number, kpage) : - ehca_set_pagebuf_user2(pginfo, number, kpage); - break; - case EHCA_MR_PGI_FMR: - ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); - break; - default: - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - break; - } - return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ - -/* - * check MR if it is a max-MR, i.e. uses whole memory - * in case it's a max-MR 1 is returned, else 0 - */ -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start) -{ - /* a MR is treated as max-MR only if it fits following: */ - if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { - ehca_gen_dbg("this is a max-MR"); - return 1; - } else - return 0; -} /* end ehca_mr_is_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* map access control for MR/MW. This routine is used for MR and MW. */ -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl) -{ - *hipz_acl = 0; - if (ib_acl & IB_ACCESS_REMOTE_READ) - *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; - if (ib_acl & IB_ACCESS_REMOTE_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; - if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) - *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; - if (ib_acl & IB_ACCESS_LOCAL_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; - if (ib_acl & IB_ACCESS_MW_BIND) - *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; -} /* end ehca_mrmw_map_acl() */ - -/*----------------------------------------------------------------------*/ - -/* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ -{ - *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); -} /* end ehca_mrmw_set_pgsize_hipz_acl() */ - -/*----------------------------------------------------------------------*/ - -/* - * reverse map access control for MR/MW. - * This routine is used for MR and MW. - */ -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl) /*OUT*/ -{ - *ib_acl = 0; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) - *ib_acl |= IB_ACCESS_REMOTE_READ; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) - *ib_acl |= IB_ACCESS_REMOTE_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) - *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; - if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) - *ib_acl |= IB_ACCESS_LOCAL_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) - *ib_acl |= IB_ACCESS_MW_BIND; -} /* end ehca_mrmw_reverse_map_acl() */ - - -/*----------------------------------------------------------------------*/ - -/* - * MR destructor and constructor - * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, - * except struct ib_mr and spinlock - */ -void ehca_mr_deletenew(struct ehca_mr *mr) -{ - mr->flags = 0; - mr->num_kpages = 0; - mr->num_hwpages = 0; - mr->acl = 0; - mr->start = NULL; - mr->fmr_page_size = 0; - mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; - memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); - memset(&mr->galpas, 0, sizeof(mr->galpas)); -} /* end ehca_mr_deletenew() */ - -int ehca_init_mrmw_cache(void) -{ - mr_cache = kmem_cache_create("ehca_cache_mr", - sizeof(struct ehca_mr), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mr_cache) - return -ENOMEM; - mw_cache = kmem_cache_create("ehca_cache_mw", - sizeof(struct ehca_mw), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mw_cache) { - kmem_cache_destroy(mr_cache); - mr_cache = NULL; - return -ENOMEM; - } - return 0; -} - -void ehca_cleanup_mrmw_cache(void) -{ - if (mr_cache) - kmem_cache_destroy(mr_cache); - if (mw_cache) - kmem_cache_destroy(mw_cache); -} - -static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, - int dir) -{ - if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { - ehca_top_bmap->dir[dir] = - kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); - if (!ehca_top_bmap->dir[dir]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); - } - return 0; -} - -static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) -{ - if (!ehca_bmap_valid(ehca_bmap->top[top])) { - ehca_bmap->top[top] = - kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); - if (!ehca_bmap->top[top]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); - } - return ehca_init_top_bmap(ehca_bmap->top[top], dir); -} - -static inline int ehca_calc_index(unsigned long i, unsigned long s) -{ - return (i >> s) & EHCA_INDEX_MASK; -} - -void ehca_destroy_busmap(void) -{ - int top, dir; - - if (!ehca_bmap) - return; - - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - kfree(ehca_bmap->top[top]->dir[dir]); - } - - kfree(ehca_bmap->top[top]); - } - - kfree(ehca_bmap); - ehca_bmap = NULL; -} - -static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) -{ - unsigned long i, start_section, end_section; - int top, dir, idx; - - if (!nr_pages) - return 0; - - if (!ehca_bmap) { - ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); - if (!ehca_bmap) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); - } - - start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; - end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; - for (i = start_section; i < end_section; i++) { - int ret; - top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); - dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); - idx = i & EHCA_INDEX_MASK; - - ret = ehca_init_bmap(ehca_bmap, top, dir); - if (ret) { - ehca_destroy_busmap(); - return ret; - } - ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; - ehca_mr_len += EHCA_SECTSIZE; - } - return 0; -} - -static int ehca_is_hugepage(unsigned long pfn) -{ - int page_order; - - if (pfn & EHCA_HUGEPAGE_PFN_MASK) - return 0; - - page_order = compound_order(pfn_to_page(pfn)); - if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) - return 0; - - return 1; -} - -static int ehca_create_busmap_callback(unsigned long initial_pfn, - unsigned long total_nr_pages, void *arg) -{ - int ret; - unsigned long pfn, start_pfn, end_pfn, nr_pages; - - if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) - return ehca_update_busmap(initial_pfn, total_nr_pages); - - /* Given chunk is >= 16GB -> check for hugepages */ - start_pfn = initial_pfn; - end_pfn = initial_pfn + total_nr_pages; - pfn = start_pfn; - - while (pfn < end_pfn) { - if (ehca_is_hugepage(pfn)) { - /* Add mem found in front of the hugepage */ - nr_pages = pfn - start_pfn; - ret = ehca_update_busmap(start_pfn, nr_pages); - if (ret) - return ret; - /* Skip the hugepage */ - pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); - start_pfn = pfn; - } else - pfn += (EHCA_SECTSIZE / PAGE_SIZE); - } - - /* Add mem found behind the hugepage(s) */ - nr_pages = pfn - start_pfn; - return ehca_update_busmap(start_pfn, nr_pages); -} - -int ehca_create_busmap(void) -{ - int ret; - - ehca_mr_len = 0; - ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, - ehca_create_busmap_callback); - return ret; -} - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int top; - u64 hret, *kpage; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - return -ENOMEM; - } - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); - if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) - break; - } - - ehca_free_fw_ctrlblock(kpage); - - if (hret == H_SUCCESS) - return 0; /* Everything is fine */ - else { - ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " - "h_ret=%lli e_mr=%p top=%x lkey=%x " - "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - return ehca2ib_return_code(hret); - } -} - -static u64 ehca_map_vaddr(void *caddr) -{ - int top, dir, idx; - unsigned long abs_addr, offset; - u64 entry; - - if (!ehca_bmap) - return EHCA_INVAL_ADDR; - - abs_addr = __pa(caddr); - top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top])) - return EHCA_INVAL_ADDR; - - dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - return EHCA_INVAL_ADDR; - - idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); - - entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; - if (ehca_bmap_valid(entry)) { - offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); - return entry | offset; - } else - return EHCA_INVAL_ADDR; -} - -static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == EHCA_INVAL_ADDR; -} - -static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, - size_t size, enum dma_data_direction direction) -{ - if (cpu_addr) - return ehca_map_vaddr(cpu_addr); - else - return EHCA_INVAL_ADDR; -} - -static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - if (offset + size > PAGE_SIZE) - return EHCA_INVAL_ADDR; - - addr = ehca_map_vaddr(page_address(page)); - if (!ehca_dma_mapping_error(dev, addr)) - addr += offset; - - return addr; -} - -static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - u64 addr; - addr = ehca_map_vaddr(sg_virt(sg)); - if (ehca_dma_mapping_error(dev, addr)) - return 0; - - sg->dma_address = addr; - sg->dma_length = sg->length; - } - return nents; -} - -static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); -} - -static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - u64 dma_addr; - - p = alloc_pages(flag, get_order(size)); - if (p) { - addr = page_address(p); - dma_addr = ehca_map_vaddr(addr); - if (ehca_dma_mapping_error(dev, dma_addr)) { - free_pages((unsigned long)addr, get_order(size)); - return NULL; - } - if (dma_handle) - *dma_handle = dma_addr; - return addr; - } - return NULL; -} - -static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - if (cpu_addr && size) - free_pages((unsigned long)cpu_addr, get_order(size)); -} - - -struct ib_dma_mapping_ops ehca_dma_mapping_ops = { - .mapping_error = ehca_dma_mapping_error, - .map_single = ehca_dma_map_single, - .unmap_single = ehca_dma_unmap_single, - .map_page = ehca_dma_map_page, - .unmap_page = ehca_dma_unmap_page, - .map_sg = ehca_dma_map_sg, - .unmap_sg = ehca_dma_unmap_sg, - .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, - .sync_single_for_device = ehca_dma_sync_single_for_device, - .alloc_coherent = ehca_dma_alloc_coherent, - .free_coherent = ehca_dma_free_coherent, -}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h deleted file mode 100644 index 50d8b51..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW declarations and inline functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _EHCA_MRMW_H_ -#define _EHCA_MRMW_H_ - -enum ehca_reg_type { - EHCA_REG_MR, - EHCA_REG_BUSMAP_MR -}; - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey, - enum ehca_reg_type reg_type); - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int mr_access_flags, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey); - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr); - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_reg_internal_maxmr(struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **maxmr); - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca); - -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size); - -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len); - -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage); - -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start); - -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl); - -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); - -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl); - -void ehca_mr_deletenew(struct ehca_mr *mr); - -int ehca_create_busmap(void); - -void ehca_destroy_busmap(void); - -extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; -#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c deleted file mode 100644 index 351577a..0000000 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * PD functions - * - * Authors: Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" - -static struct kmem_cache *pd_cache; - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, struct ib_udata *udata) -{ - struct ehca_pd *pd; - int i; - - pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); - if (!pd) { - ehca_err(device, "device=%p context=%p out of memory", - device, context); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < 2; i++) { - INIT_LIST_HEAD(&pd->free[i]); - INIT_LIST_HEAD(&pd->full[i]); - } - mutex_init(&pd->lock); - - /* - * Kernel PD: when device = -1, 0 - * User PD: when context != -1 - */ - if (!context) { - /* - * Kernel PDs after init reuses always - * the one created in ehca_shca_reopen() - */ - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - pd->fw_pd.value = shca->pd->fw_pd.value; - } else - pd->fw_pd.value = (u64)pd; - - return &pd->ib_pd; -} - -int ehca_dealloc_pd(struct ib_pd *pd) -{ - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - int i, leftovers = 0; - struct ipz_small_queue_page *page, *tmp; - - for (i = 0; i < 2; i++) { - list_splice(&my_pd->full[i], &my_pd->free[i]); - list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { - leftovers = 1; - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } - } - - if (leftovers) - ehca_warn(pd->device, - "Some small queue pages were not freed"); - - kmem_cache_free(pd_cache, my_pd); - - return 0; -} - -int ehca_init_pd_cache(void) -{ - pd_cache = kmem_cache_create("ehca_cache_pd", - sizeof(struct ehca_pd), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!pd_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_pd_cache(void) -{ - if (pd_cache) - kmem_cache_destroy(pd_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h deleted file mode 100644 index 90c4efa..0000000 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ /dev/null @@ -1,260 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Hardware request structures - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _EHCA_QES_H_ -#define _EHCA_QES_H_ - -#include "ehca_tools.h" - -/* virtual scatter gather entry to specify remote addresses with length */ -struct ehca_vsgentry { - u64 vaddr; - u32 lkey; - u32 length; -}; - -#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) - -/* - * Unreliable Datagram Address Vector Format - * see IBTA Vol1 chapter 8.3 Global Routing Header - */ -struct ehca_ud_av { - u8 sl; - u8 lnh; - u16 dlid; - u8 reserved1; - u8 reserved2; - u8 reserved3; - u8 slid_path_bits; - u8 reserved4; - u8 ipd; - u8 reserved5; - u8 pmtu; - u32 reserved6; - u64 reserved7; - union { - struct { - u64 word_0; /* always set to 6 */ - /*should be 0x1B for IB transport */ - u64 word_1; - u64 word_2; - u64 word_3; - u64 word_4; - } grh; - struct { - u32 wd_0; - u32 wd_1; - /* DWord_1 --> SGID */ - - u32 sgid_wd3; - u32 sgid_wd2; - - u32 sgid_wd1; - u32 sgid_wd0; - /* DWord_3 --> DGID */ - - u32 dgid_wd3; - u32 dgid_wd2; - - u32 dgid_wd1; - u32 dgid_wd0; - } grh_l; - }; -}; - -/* maximum number of sg entries allowed in a WQE */ -#define MAX_WQE_SG_ENTRIES 252 - -#define WQE_OPTYPE_SEND 0x80 -#define WQE_OPTYPE_RDMAREAD 0x40 -#define WQE_OPTYPE_RDMAWRITE 0x20 -#define WQE_OPTYPE_CMPSWAP 0x10 -#define WQE_OPTYPE_FETCHADD 0x08 -#define WQE_OPTYPE_BIND 0x04 - -#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 -#define WQE_WRFLAG_FENCE 0x40 -#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 -#define WQE_WRFLAG_SOLIC_EVENT 0x10 - -#define WQEF_CACHE_HINT 0x80 -#define WQEF_CACHE_HINT_RD_WR 0x40 -#define WQEF_TIMED_WQE 0x20 -#define WQEF_PURGE 0x08 -#define WQEF_HIGH_NIBBLE 0xF0 - -#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 -#define MW_BIND_ACCESSCTRL_R_READ 0x20 -#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 - -struct ehca_wqe { - u64 work_request_id; - u8 optype; - u8 wr_flag; - u16 pkeyi; - u8 wqef; - u8 nr_of_data_seg; - u16 wqe_provided_slid; - u32 destination_qp_number; - u32 resync_psn_sqp; - u32 local_ee_context_qkey; - u32 immediate_data; - union { - struct { - u64 remote_virtual_address; - u32 rkey; - u32 reserved; - u64 atomic_1st_op_dma_len; - u64 atomic_2nd_op; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - - } nud; - struct { - u64 ehca_ud_av_ptr; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } ud_avp; - struct { - struct ehca_ud_av ud_av; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - - 2]; - } ud_av; - struct { - u64 reserved0; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } all_rcv; - - struct { - u64 reserved; - u32 rkey; - u32 old_rkey; - u64 reserved1; - u64 reserved2; - u64 virtual_address; - u32 reserved3; - u32 length; - u32 reserved4; - u16 reserved5; - u8 reserved6; - u8 lr_ctl; - u32 lkey; - u32 reserved7; - u64 reserved8; - u64 reserved9; - u64 reserved10; - u64 reserved11; - } bind; - struct { - u64 reserved12; - u64 reserved13; - u32 size; - u32 start; - } inline_data; - } u; - -}; - -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) -#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) -#define WC_STATUS_ERROR_BIT 0x80000000 -#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 -#define WC_STATUS_PURGE_BIT 0x10 -#define WC_SEND_RECEIVE_BIT 0x80 - -struct ehca_cqe { - u64 work_request_id; - u8 optype; - u8 w_completion_flags; - u16 reserved1; - u32 nr_bytes_transferred; - u32 immediate_data; - u32 local_qp_number; - u8 freed_resource_count; - u8 service_level; - u16 wqe_count; - u32 qp_token; - u32 qkey_ee_token; - u32 remote_qp_number; - u16 dlid; - u16 rlid; - u16 reserved2; - u16 pkey_index; - u32 cqe_timestamp; - u32 wqe_timestamp; - u8 wqe_timestamp_valid; - u8 reserved3; - u8 reserved4; - u8 cqe_flags; - u32 status; -}; - -struct ehca_eqe { - u64 entry; -}; - -struct ehca_mrte { - u64 starting_va; - u64 length; /* length of memory region in bytes*/ - u32 pd; - u8 key_instance; - u8 pagesize; - u8 mr_control; - u8 local_remote_access_ctrl; - u8 reserved[0x20 - 0x18]; - u64 at_pointer[4]; -}; -#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c deleted file mode 100644 index 2e89356..0000000 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ /dev/null @@ -1,2257 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * QP functions - * - * Authors: Joachim Fenkes - * Stefan Roscher - * Waleri Fomin - * Hoang-Nam Nguyen - * Reinhard Ernst - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -static struct kmem_cache *qp_cache; - -/* - * attributes not supported by query qp - */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ - IB_QP_EN_SQD_ASYNC_NOTIFY) - -/* - * ehca (internal) qp state values - */ -enum ehca_qp_state { - EHCA_QPS_RESET = 1, - EHCA_QPS_INIT = 2, - EHCA_QPS_RTR = 3, - EHCA_QPS_RTS = 5, - EHCA_QPS_SQD = 6, - EHCA_QPS_SQE = 8, - EHCA_QPS_ERR = 128 -}; - -/* - * qp state transitions as defined by IB Arch Rel 1.1 page 431 - */ -enum ib_qp_statetrans { - IB_QPST_ANY2RESET, - IB_QPST_ANY2ERR, - IB_QPST_RESET2INIT, - IB_QPST_INIT2RTR, - IB_QPST_INIT2INIT, - IB_QPST_RTR2RTS, - IB_QPST_RTS2SQD, - IB_QPST_RTS2RTS, - IB_QPST_SQD2RTS, - IB_QPST_SQE2RTS, - IB_QPST_SQD2SQD, - IB_QPST_MAX /* nr of transitions, this must be last!!! */ -}; - -/* - * ib2ehca_qp_state maps IB to ehca qp_state - * returns ehca qp state corresponding to given ib qp state - */ -static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) -{ - switch (ib_qp_state) { - case IB_QPS_RESET: - return EHCA_QPS_RESET; - case IB_QPS_INIT: - return EHCA_QPS_INIT; - case IB_QPS_RTR: - return EHCA_QPS_RTR; - case IB_QPS_RTS: - return EHCA_QPS_RTS; - case IB_QPS_SQD: - return EHCA_QPS_SQD; - case IB_QPS_SQE: - return EHCA_QPS_SQE; - case IB_QPS_ERR: - return EHCA_QPS_ERR; - default: - ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); - return -EINVAL; - } -} - -/* - * ehca2ib_qp_state maps ehca to IB qp_state - * returns ib qp state corresponding to given ehca qp state - */ -static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state - ehca_qp_state) -{ - switch (ehca_qp_state) { - case EHCA_QPS_RESET: - return IB_QPS_RESET; - case EHCA_QPS_INIT: - return IB_QPS_INIT; - case EHCA_QPS_RTR: - return IB_QPS_RTR; - case EHCA_QPS_RTS: - return IB_QPS_RTS; - case EHCA_QPS_SQD: - return IB_QPS_SQD; - case EHCA_QPS_SQE: - return IB_QPS_SQE; - case EHCA_QPS_ERR: - return IB_QPS_ERR; - default: - ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); - return -EINVAL; - } -} - -/* - * ehca_qp_type used as index for req_attr and opt_attr of - * struct ehca_modqp_statetrans - */ -enum ehca_qp_type { - QPT_RC = 0, - QPT_UC = 1, - QPT_UD = 2, - QPT_SQP = 3, - QPT_MAX -}; - -/* - * ib2ehcaqptype maps Ib to ehca qp_type - * returns ehca qp type corresponding to ib qp type - */ -static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return QPT_SQP; - case IB_QPT_RC: - return QPT_RC; - case IB_QPT_UC: - return QPT_UC; - case IB_QPT_UD: - return QPT_UD; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, - int ib_tostate) -{ - int index = -EINVAL; - switch (ib_tostate) { - case IB_QPS_RESET: - index = IB_QPST_ANY2RESET; - break; - case IB_QPS_INIT: - switch (ib_fromstate) { - case IB_QPS_RESET: - index = IB_QPST_RESET2INIT; - break; - case IB_QPS_INIT: - index = IB_QPST_INIT2INIT; - break; - } - break; - case IB_QPS_RTR: - if (ib_fromstate == IB_QPS_INIT) - index = IB_QPST_INIT2RTR; - break; - case IB_QPS_RTS: - switch (ib_fromstate) { - case IB_QPS_RTR: - index = IB_QPST_RTR2RTS; - break; - case IB_QPS_RTS: - index = IB_QPST_RTS2RTS; - break; - case IB_QPS_SQD: - index = IB_QPST_SQD2RTS; - break; - case IB_QPS_SQE: - index = IB_QPST_SQE2RTS; - break; - } - break; - case IB_QPS_SQD: - if (ib_fromstate == IB_QPS_RTS) - index = IB_QPST_RTS2SQD; - break; - case IB_QPS_SQE: - break; - case IB_QPS_ERR: - index = IB_QPST_ANY2ERR; - break; - default: - break; - } - return index; -} - -/* - * ibqptype2servicetype returns hcp service type corresponding to given - * ib qp type used by create_qp() - */ -static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return ST_UD; - case IB_QPT_RC: - return ST_RC; - case IB_QPT_UC: - return ST_UC; - case IB_QPT_UD: - return ST_UD; - case IB_QPT_RAW_IPV6: - return -EINVAL; - case IB_QPT_RAW_ETHERTYPE: - return -EINVAL; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -/* - * init userspace queue info from ipz_queue data - */ -static inline void queue2resp(struct ipzu_queue_resp *resp, - struct ipz_queue *queue) -{ - resp->qe_size = queue->qe_size; - resp->act_nr_of_sg = queue->act_nr_of_sg; - resp->queue_length = queue->queue_length; - resp->pagesize = queue->pagesize; - resp->toggle_state = queue->toggle_state; - resp->offset = queue->offset; -} - -/* - * init_qp_queue initializes/constructs r/squeue and registers queue pages. - */ -static inline int init_qp_queue(struct ehca_shca *shca, - struct ehca_pd *pd, - struct ehca_qp *my_qp, - struct ipz_queue *queue, - int q_type, - u64 expected_hret, - struct ehca_alloc_queue_parms *parms, - int wqe_size) -{ - int ret, cnt, ipz_rc, nr_q_pages; - void *vpage; - u64 rpage, h_ret; - struct ib_device *ib_dev = &shca->ib_device; - struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - - if (!parms->queue_size) - return 0; - - if (parms->is_small) { - nr_q_pages = 1; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - 128 << parms->page_size, - wqe_size, parms->act_nr_sges, 1); - } else { - nr_q_pages = parms->queue_size; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - EHCA_PAGESIZE, wqe_size, - parms->act_nr_sges, 0); - } - - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", - ipz_rc); - return -EBUSY; - } - - /* register queue pages */ - for (cnt = 0; cnt < nr_q_pages; cnt++) { - vpage = ipz_qpageit_get_inc(queue); - if (!vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "failed p_vpage= %p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - NULL, 0, q_type, - rpage, parms->is_small ? 0 : 1, - my_qp->galpas.kernel); - if (cnt == (nr_q_pages - 1)) { /* last page! */ - if (h_ret != expected_hret) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "should not succeed vpage=%p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - } - } - - ipz_qeit_reset(queue); - - return 0; - -init_qp_queue1: - ipz_queue_dtor(pd, queue); - return ret; -} - -static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) -{ - if (is_llqp) - return 128 << act_nr_sge; - else - return offsetof(struct ehca_wqe, - u.nud.sg_list[act_nr_sge]); -} - -static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, - int req_nr_sge, int is_llqp) -{ - u32 wqe_size, q_size; - int act_nr_sge = req_nr_sge; - - if (!is_llqp) - /* round up #SGEs so WQE size is a power of 2 */ - for (act_nr_sge = 4; act_nr_sge <= 252; - act_nr_sge = 4 + 2 * act_nr_sge) - if (act_nr_sge >= req_nr_sge) - break; - - wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); - q_size = wqe_size * (queue->max_wr + 1); - - if (q_size <= 512) - queue->page_size = 2; - else if (q_size <= 1024) - queue->page_size = 3; - else - queue->page_size = 0; - - queue->is_small = (queue->page_size != 0); -} - -/* needs to be called with cq->spinlock held */ -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) -{ - struct list_head *list, *node; - - /* TODO: support low latency QPs */ - if (qp->ext_type == EQPT_LLQP) - return; - - if (on_sq) { - list = &qp->send_cq->sqp_err_list; - node = &qp->sq_err_node; - } else { - list = &qp->recv_cq->rqp_err_list; - node = &qp->rq_err_node; - } - - if (list_empty(node)) - list_add_tail(node, list); - - return; -} - -static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) -{ - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - - if (!list_empty(node)) - list_del_init(node); - - spin_unlock_irqrestore(&cq->spinlock, flags); -} - -static void reset_queue_map(struct ehca_queue_map *qmap) -{ - int i; - - qmap->tail = qmap->entries - 1; - qmap->left_to_poll = 0; - qmap->next_wqe_idx = 0; - for (i = 0; i < qmap->entries; i++) { - qmap->map[i].reported = 1; - qmap->map[i].cqe_req = 0; - } -} - -/* - * Create an ib_qp struct that is either a QP or an SRQ, depending on - * the value of the is_srq parameter. If init_attr and srq_init_attr share - * fields, the field out of init_attr is used. - */ -static struct ehca_qp *internal_create_qp( - struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata, int is_srq) -{ - struct ehca_qp *my_qp, *my_srq = NULL; - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct ib_ucontext *context = NULL; - u64 h_ret; - int is_llqp = 0, has_srq = 0, is_user = 0; - int qp_type, max_send_sge, max_recv_sge, ret; - - /* h_call's out parameters */ - struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; - unsigned long flags; - - if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { - ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", shca->max_num_qps); - ehca_err(pd->device, "To increase the maximum number of QPs " - "use the number_of_qps module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - if (init_attr->create_flags) { - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - memset(&parms, 0, sizeof(parms)); - qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && - init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { - ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", - init_attr->sq_sig_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* save LLQP info */ - if (qp_type & 0x80) { - is_llqp = 1; - parms.ext_type = EQPT_LLQP; - parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; - } - qp_type &= 0x1F; - init_attr->qp_type &= 0x1F; - - /* handle SRQ base QPs */ - if (init_attr->srq) { - my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); - - if (qp_type == IB_QPT_UC) { - ehca_err(pd->device, "UC with SRQ not supported"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - has_srq = 1; - parms.ext_type = EQPT_SRQBASE; - parms.srq_qpn = my_srq->real_qp_num; - } - - if (is_llqp && has_srq) { - ehca_err(pd->device, "LLQPs can't have an SRQ"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* handle SRQs */ - if (is_srq) { - parms.ext_type = EQPT_SRQ; - parms.srq_limit = srq_init_attr->attr.srq_limit; - if (init_attr->cap.max_recv_sge > 3) { - ehca_err(pd->device, "no more than three SGEs " - "supported for SRQ pd=%p max_sge=%x", - pd, init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - /* check QP type */ - if (qp_type != IB_QPT_UD && - qp_type != IB_QPT_UC && - qp_type != IB_QPT_RC && - qp_type != IB_QPT_SMI && - qp_type != IB_QPT_GSI) { - ehca_err(pd->device, "wrong QP Type=%x", qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - if (is_llqp) { - switch (qp_type) { - case IB_QPT_RC: - if ((init_attr->cap.max_send_wr > 255) || - (init_attr->cap.max_recv_wr > 255)) { - ehca_err(pd->device, - "Invalid Number of max_sq_wr=%x " - "or max_rq_wr=%x for RC LLQP", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - case IB_QPT_UD: - if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { - ehca_err(pd->device, "UD LLQP not supported " - "by this adapter"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOSYS); - } - if (!(init_attr->cap.max_send_sge <= 5 - && init_attr->cap.max_send_sge >= 1 - && init_attr->cap.max_recv_sge <= 5 - && init_attr->cap.max_recv_sge >= 1)) { - ehca_err(pd->device, - "Invalid Number of max_send_sge=%x " - "or max_recv_sge=%x for UD LLQP", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } else if (init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of " - "max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - default: - ehca_err(pd->device, "unsupported LL QP Type=%x", - qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } else { - int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI - || qp_type == IB_QPT_GSI) ? 250 : 252; - - if (init_attr->cap.max_send_sge > max_sge - || init_attr->cap.max_recv_sge > max_sge) { - ehca_err(pd->device, "Invalid number of SGEs requested " - "send_sge=%x recv_sge=%x max_sge=%x", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge, max_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); - if (!my_qp) { - ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOMEM); - } - - if (pd->uobject && udata) { - is_user = 1; - context = pd->uobject->context; - } - - atomic_set(&my_qp->nr_events, 0); - init_waitqueue_head(&my_qp->wait_completion); - spin_lock_init(&my_qp->spinlock_s); - spin_lock_init(&my_qp->spinlock_r); - my_qp->qp_type = qp_type; - my_qp->ext_type = parms.ext_type; - my_qp->state = IB_QPS_RESET; - - if (init_attr->recv_cq) - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - if (init_attr->send_cq) - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - - ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); - if (ret >= 0) - my_qp->token = ret; - - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - idr_preload_end(); - if (ret < 0) { - if (ret == -ENOSPC) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - } else { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); - } - goto create_qp_exit0; - } - - if (has_srq) - parms.srq_token = my_qp->token; - - parms.servicetype = ibqptype2servicetype(qp_type); - if (parms.servicetype < 0) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", qp_type); - goto create_qp_exit1; - } - - /* Always signal by WQE so we can hide circ. WQEs */ - parms.sigtype = HCALL_SIGT_BY_WQE; - - /* UD_AV CIRCUMVENTION */ - max_send_sge = init_attr->cap.max_send_sge; - max_recv_sge = init_attr->cap.max_recv_sge; - if (parms.servicetype == ST_UD && !is_llqp) { - max_send_sge += 2; - max_recv_sge += 2; - } - - parms.token = my_qp->token; - parms.eq_handle = shca->eq.ipz_eq_handle; - parms.pd = my_pd->fw_pd; - if (my_qp->send_cq) - parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; - if (my_qp->recv_cq) - parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - - parms.squeue.max_wr = init_attr->cap.max_send_wr; - parms.rqueue.max_wr = init_attr->cap.max_recv_wr; - parms.squeue.max_sge = max_send_sge; - parms.rqueue.max_sge = max_recv_sge; - - /* RC QPs need one more SWQE for unsolicited ack circumvention */ - if (qp_type == IB_QPT_RC) - parms.squeue.max_wr++; - - if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { - if (HAS_SQ(my_qp)) - ehca_determine_small_queue( - &parms.squeue, max_send_sge, is_llqp); - if (HAS_RQ(my_qp)) - ehca_determine_small_queue( - &parms.rqueue, max_recv_sge, is_llqp); - parms.qp_storage = - (parms.squeue.is_small || parms.rqueue.is_small); - } - - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", - h_ret); - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit1; - } - - ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; - my_qp->ipz_qp_handle = parms.qp_handle; - my_qp->galpas = parms.galpas; - - swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); - rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); - - switch (qp_type) { - case IB_QPT_RC: - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } - /* hide the extra WQE */ - parms.squeue.act_nr_wqes--; - break; - case IB_QPT_UD: - case IB_QPT_GSI: - case IB_QPT_SMI: - /* UD circumvention */ - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } else { - parms.squeue.act_nr_sges -= 2; - parms.rqueue.act_nr_sges -= 2; - } - - if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { - parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; - parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; - parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; - parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; - ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; - } - - break; - - default: - break; - } - - /* initialize r/squeue and register queue pages */ - if (HAS_SQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, - HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, - &parms.squeue, swqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize squeue " - "and pages ret=%i", ret); - goto create_qp_exit2; - } - - if (!is_user) { - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; - } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); - } - } - - if (HAS_RQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, - H_SUCCESS, &parms.rqueue, rwqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize rqueue " - "and pages ret=%i", ret); - goto create_qp_exit4; - } - if (!is_user) { - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; - } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } - } else if (init_attr->srq && !is_user) { - /* this is a base QP, use the queue map of the SRQ */ - my_qp->rq_map = my_srq->rq_map; - INIT_LIST_HEAD(&my_qp->rq_err_node); - - my_qp->ipz_rqueue = my_srq->ipz_rqueue; - } - - if (is_srq) { - my_qp->ib_srq.pd = &my_pd->ib_pd; - my_qp->ib_srq.device = my_pd->ib_pd.device; - - my_qp->ib_srq.srq_context = init_attr->qp_context; - my_qp->ib_srq.event_handler = init_attr->event_handler; - } else { - my_qp->ib_qp.qp_num = ib_qp_num; - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; - - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; - - my_qp->ib_qp.qp_type = qp_type; - my_qp->ib_qp.srq = init_attr->srq; - - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; - } - - init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; - init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; - init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; - init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; - my_qp->init_attr = *init_attr; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - &my_qp->ib_qp; - if (ehca_nr_ports < 0) { - /* alloc array to cache subsequent modify qp parms - * for autodetect mode - */ - my_qp->mod_qp_parm = - kzalloc(EHCA_MOD_QP_PARM_MAX * - sizeof(*my_qp->mod_qp_parm), - GFP_KERNEL); - if (!my_qp->mod_qp_parm) { - ehca_err(pd->device, - "Could not alloc mod_qp_parm"); - goto create_qp_exit5; - } - } - } - - /* NOTE: define_apq0() not supported yet */ - if (qp_type == IB_QPT_GSI) { - h_ret = ehca_define_sqp(shca, my_qp, init_attr); - if (h_ret != H_SUCCESS) { - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - /* the QP pointer is no longer valid */ - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - NULL; - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit6; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); - if (ret) { - ehca_err(pd->device, - "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit7; - } - } - - /* copy queues, galpa data to user space */ - if (context && udata) { - struct ehca_create_qp_resp resp; - memset(&resp, 0, sizeof(resp)); - - resp.qp_num = my_qp->real_qp_num; - resp.token = my_qp->token; - resp.qp_type = my_qp->qp_type; - resp.ext_type = my_qp->ext_type; - resp.qkey = my_qp->qkey; - resp.real_qp_num = my_qp->real_qp_num; - - if (HAS_SQ(my_qp)) - queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); - if (HAS_RQ(my_qp)) - queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); - resp.fw_handle_ofs = (u32) - (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); - - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { - ehca_err(pd->device, "Copy to udata failed"); - ret = -EINVAL; - goto create_qp_exit8; - } - } - - return my_qp; - -create_qp_exit8: - ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); - -create_qp_exit7: - kfree(my_qp->mod_qp_parm); - -create_qp_exit6: - if (HAS_RQ(my_qp) && !is_user) - vfree(my_qp->rq_map.map); - -create_qp_exit5: - if (HAS_RQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - -create_qp_exit4: - if (HAS_SQ(my_qp) && !is_user) - vfree(my_qp->sq_map.map); - -create_qp_exit3: - if (HAS_SQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - -create_qp_exit2: - hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - -create_qp_exit1: - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - -create_qp_exit0: - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return ERR_PTR(ret); -} - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) -{ - struct ehca_qp *ret; - - ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ib_qp_init_attr qp_init_attr; - struct ehca_qp *my_qp; - struct ib_srq *ret; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 hret, update_mask; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); - - /* For common attributes, internal_create_qp() takes its info - * out of qp_init_attr, so copy all common attrs there. - */ - memset(&qp_init_attr, 0, sizeof(qp_init_attr)); - qp_init_attr.event_handler = srq_init_attr->event_handler; - qp_init_attr.qp_context = srq_init_attr->srq_context; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.qp_type = IB_QPT_RC; - qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; - qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; - - my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); - if (IS_ERR(my_qp)) - return (struct ib_srq *)my_qp; - - /* copy back return values */ - srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = 3; - - /* drive SRQ into RTR state */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(pd->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - ret = ERR_PTR(-ENOMEM); - goto create_srq1; - } - - mqpcb->qp_state = EHCA_QPS_INIT; - mqpcb->prim_phys_port = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_enable = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_state = EHCA_QPS_RTR; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - ehca_free_fw_ctrlblock(mqpcb); - - return &my_qp->ib_srq; - -create_srq2: - ret = ERR_PTR(ehca2ib_return_code(hret)); - ehca_free_fw_ctrlblock(mqpcb); - -create_srq1: - internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); - - return ret; -} - -/* - * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts - * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe - * returns total number of bad wqes in bad_wqe_cnt - */ -static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, - int *bad_wqe_cnt) -{ - u64 h_ret; - struct ipz_queue *squeue; - void *bad_send_wqe_p, *bad_send_wqe_v; - u64 q_ofs; - struct ehca_wqe *wqe; - int qp_num = my_qp->ib_qp.qp_num; - - /* get send wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &bad_send_wqe_p, NULL, 2); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); - ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", - qp_num, bad_send_wqe_p); - /* convert wqe pointer to vadr */ - bad_send_wqe_v = __va((u64)bad_send_wqe_p); - if (ehca_debug_level >= 2) - ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); - squeue = &my_qp->ipz_squeue; - if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { - ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" - " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); - return -EFAULT; - } - - /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = 0; - while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level >= 2) - ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); - wqe->nr_of_data_seg = 0; /* suppress data access */ - wqe->wqef = WQEF_PURGE; /* WQE to be purged */ - q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = (*bad_wqe_cnt)+1; - } - /* - * bad wqe will be reprocessed and ignored when pol_cq() is called, - * i.e. nr of wqes with flush error status is one less - */ - ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", - qp_num, (*bad_wqe_cnt)-1); - wqe->wqef = 0; - - return 0; -} - -static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, - struct ehca_queue_map *qmap) -{ - void *wqe_v; - u64 q_ofs; - u32 wqe_idx; - unsigned int tail_idx; - - /* convert real to abs address */ - wqe_p = wqe_p & (~(1UL << 63)); - - wqe_v = __va(wqe_p); - - if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { - ehca_gen_err("Invalid offset for calculating left cqes " - "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); - return -EFAULT; - } - - tail_idx = next_index(qmap->tail, qmap->entries); - wqe_idx = q_ofs / ipz_queue->qe_size; - - /* check all processed wqes, whether a cqe is requested or not */ - while (tail_idx != wqe_idx) { - if (qmap->map[tail_idx].cqe_req) - qmap->left_to_poll++; - tail_idx = next_index(tail_idx, qmap->entries); - } - /* save index in queue, where we have to start flushing */ - qmap->next_wqe_idx = wqe_idx; - return 0; -} - -static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) -{ - u64 h_ret; - void *send_wqe_p, *recv_wqe_p; - int ret; - unsigned long flags; - int qp_num = my_qp->ib_qp.qp_num; - - /* this hcall is not supported on base QPs */ - if (my_qp->ext_type != EQPT_SRQBASE) { - /* get send and receive wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &send_wqe_p, &recv_wqe_p, 4); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "disable_and_get_wqe() " - "failed ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - - /* - * acquire lock to ensure that nobody is polling the cq which - * could mean that the qmap->tail pointer is in an - * inconsistent state. - */ - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, - &my_qp->sq_map); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - if (ret) - return ret; - - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, - &my_qp->rq_map); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - if (ret) - return ret; - } else { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - } - - /* this assures flush cqes being generated only for pending wqes */ - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 1); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - if (HAS_RQ(my_qp)) { - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 0); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, - flags); - } - } - - return 0; -} - -/* - * internal_modify_qp with circumvention to handle aqp0 properly - * smi_reset2init indicates if this is an internal reset-to-init-call for - * smi. This flag must always be zero if called from ehca_modify_qp()! - * This internal func was intorduced to avoid recursion of ehca_modify_qp()! - */ -static int internal_modify_qp(struct ib_qp *ibqp, - struct ib_qp_attr *attr, - int attr_mask, int smi_reset2init) -{ - enum ib_qp_state qp_cur_state, qp_new_state; - int cnt, qp_attr_idx, ret = 0; - enum ib_qp_statetrans statetrans; - struct hcp_modify_qp_control_block *mqpcb; - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = - container_of(ibqp->pd->device, struct ehca_shca, ib_device); - u64 update_mask; - u64 h_ret; - int bad_wqe_cnt = 0; - int is_user = 0; - int squeue_locked = 0; - unsigned long flags = 0; - - /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!mqpcb) { - ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - mqpcb, my_qp->galpas.kernel); - if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, ibqp->qp_num, h_ret); - ret = ehca2ib_return_code(h_ret); - goto modify_qp_exit1; - } - if (ibqp->uobject) - is_user = 1; - - qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); - - if (qp_cur_state == -EINVAL) { /* invalid qp state */ - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - mqpcb->qp_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - /* - * circumvention to set aqp0 initial state to init - * as expected by IB spec - */ - if (smi_reset2init == 0 && - ibqp->qp_type == IB_QPT_SMI && - qp_cur_state == IB_QPS_RESET && - (attr_mask & IB_QP_STATE) && - attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ - struct ib_qp_attr smiqp_attr = { - .qp_state = IB_QPS_INIT, - .port_num = my_qp->init_attr.port_num, - .pkey_index = 0, - .qkey = 0 - }; - int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | - IB_QP_PKEY_INDEX | IB_QP_QKEY; - int smirc = internal_modify_qp( - ibqp, &smiqp_attr, smiqp_attr_mask, 1); - if (smirc) { - ehca_err(ibqp->device, "SMI RESET -> INIT failed. " - "ehca_modify_qp() rc=%i", smirc); - ret = H_PARAMETER; - goto modify_qp_exit1; - } - qp_cur_state = IB_QPS_INIT; - ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); - } - /* is transmitted current state equal to "real" current state */ - if ((attr_mask & IB_QP_CUR_STATE) && - qp_cur_state != attr->cur_qp_state) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" - " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", - attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " - "new qp_state=%x attribute_mask=%x", - my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); - - qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; - if (!smi_reset2init && - !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid qp transition new_state=%x cur_state=%x " - "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, - qp_cur_state, my_qp, ibqp->qp_num, attr_mask); - goto modify_qp_exit1; - } - - mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); - if (mqpcb->qp_state) - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - else { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid new qp state=%x " - "ehca_qp=%p qp_num=%x", - qp_new_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - /* retrieve state transition struct to get req and opt attrs */ - statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); - if (statetrans < 0) { - ret = -EINVAL; - ehca_err(ibqp->device, " qp_cur_state=%x " - "new_qp_state=%x State_xsition=%x ehca_qp=%p " - "qp_num=%x", qp_cur_state, qp_new_state, - statetrans, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); - - if (qp_attr_idx < 0) { - ret = qp_attr_idx; - ehca_err(ibqp->device, - "Invalid QP type=%x ehca_qp=%p qp_num=%x", - ibqp->qp_type, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, - "ehca_qp=%p qp_num=%x qp_state_xsit=%x", - my_qp, ibqp->qp_num, statetrans); - - /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set - * in non-LL UD QPs. - */ - if ((my_qp->qp_type == IB_QPT_UD) && - (my_qp->ext_type != EQPT_LLQP) && - (statetrans == IB_QPST_INIT2RTR) && - (shca->hw_level >= 0x22)) { - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - mqpcb->send_grh_flag = 1; - } - - /* sqe -> rts: set purge bit of bad wqe before actual trans */ - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* mark next free wqe if kernel */ - if (!ibqp->uobject) { - struct ehca_wqe *wqe; - /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - squeue_locked = 1; - /* mark next free wqe */ - wqe = (struct ehca_wqe *) - ipz_qeit_get(&my_qp->ipz_squeue); - wqe->optype = wqe->wqef = 0xff; - ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", - ibqp->qp_num, wqe); - } - ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); - if (ret) { - ehca_err(ibqp->device, "prepare_sqe_rts() failed " - "ehca_qp=%p qp_num=%x ret=%i", - my_qp, ibqp->qp_num, ret); - goto modify_qp_exit2; - } - } - - /* - * enable RDMA_Atomic_Control if reset->init und reliable con - * this is necessary since gen2 does not provide that flag, - * but pHyp requires it - */ - if (statetrans == IB_QPST_RESET2INIT && - (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { - mqpcb->rdma_atomic_ctrl = 3; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); - } - /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ - if (statetrans == IB_QPST_INIT2RTR && - (ibqp->qp_type == IB_QPT_UC) && - !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { - mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - - if (attr_mask & IB_QP_PKEY_INDEX) { - if (attr->pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->prim_p_key_idx = attr->pkey_index; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); - } - if (attr_mask & IB_QP_PORT) { - struct ehca_sport *sport; - struct ehca_qp *aqp1; - if (attr->port_num < 1 || attr->port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - sport = &shca->sport[attr->port_num - 1]; - if (!sport->ibqp_sqp[IB_QPT_GSI]) { - /* should not occur */ - ret = -EFAULT; - ehca_err(ibqp->device, "AQP1 was not created for " - "port=%x", attr->port_num); - goto modify_qp_exit2; - } - aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], - struct ehca_qp, ib_qp); - if (ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_SMI && - aqp1->mod_qp_parm) { - /* - * firmware will reject this modify_qp() because - * port is not activated/initialized fully - */ - ret = -EFAULT; - ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " - "either port is being activated (try again) " - "or cabling issue", attr->port_num); - goto modify_qp_exit2; - } - mqpcb->prim_phys_port = attr->port_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); - } - if (attr_mask & IB_QP_QKEY) { - mqpcb->qkey = attr->qkey; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); - } - if (attr_mask & IB_QP_AV) { - mqpcb->dlid = attr->ah_attr.dlid; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); - mqpcb->source_path_bits = attr->ah_attr.src_path_bits; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); - mqpcb->service_level = attr->ah_attr.sl; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - - if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, - attr->ah_attr.static_rate, - &mqpcb->max_static_rate)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag = 1; - - mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid.byte[cnt] = - attr->ah_attr.grh.dgid.raw[cnt]; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); - mqpcb->flow_label = attr->ah_attr.grh.flow_label; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); - mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); - mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - /* store ld(MTU) */ - my_qp->mtu_shift = attr->path_mtu + 7; - mqpcb->path_mtu = attr->path_mtu; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); - } - if (attr_mask & IB_QP_TIMEOUT) { - mqpcb->timeout = attr->timeout; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); - } - if (attr_mask & IB_QP_RETRY_CNT) { - mqpcb->retry_count = attr->retry_cnt; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RNR_RETRY) { - mqpcb->rnr_retry_count = attr->rnr_retry; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RQ_PSN) { - mqpcb->receive_psn = attr->rq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? - attr->max_dest_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? - attr->max_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET - (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); - } - if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_port_num < 1 - || attr->alt_port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->alt_port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - mqpcb->alt_phys_port = attr->alt_port_num; - - if (attr->alt_pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->alt_p_key_idx = attr->alt_pkey_index; - - mqpcb->timeout_al = attr->alt_timeout; - mqpcb->dlid_al = attr->alt_ah_attr.dlid; - mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; - mqpcb->service_level_al = attr->alt_ah_attr.sl; - - if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, - attr->alt_ah_attr.static_rate, - &mqpcb->max_static_rate_al)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - - /* OpenIB doesn't support alternate retry counts - copy them */ - mqpcb->retry_count_al = mqpcb->retry_count; - mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) - | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag_al = 1; - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid_al.byte[cnt] = - attr->alt_ah_attr.grh.dgid.raw[cnt]; - mqpcb->source_gid_idx_al = - attr->alt_ah_attr.grh.sgid_index; - mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; - mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; - mqpcb->traffic_class_al = - attr->alt_ah_attr.grh.traffic_class; - - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); - } - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) { - mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); - } - - if (attr_mask & IB_QP_SQ_PSN) { - mqpcb->send_psn = attr->sq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); - } - - if (attr_mask & IB_QP_DEST_QPN) { - mqpcb->dest_qp_nr = attr->dest_qp_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state != IB_MIG_REARM - && attr->path_mig_state != IB_MIG_MIGRATED) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid mig_state=%x", - attr->path_mig_state); - goto modify_qp_exit2; - } - mqpcb->path_migration_state = attr->path_mig_state + 1; - if (attr->path_mig_state == IB_MIG_REARM) - my_qp->mig_armed = 1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); - } - - if (attr_mask & IB_QP_CAP) { - mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); - mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); - /* no support for max_send/recv_sge yet */ - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* doorbell to reprocessing wqes */ - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, bad_wqe_cnt-1); - ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); - } - - if (statetrans == IB_QPST_RESET2INIT || - statetrans == IB_QPST_INIT2INIT) { - mqpcb->qp_enable = 1; - mqpcb->qp_state = EHCA_QPS_INIT; - update_mask = 0; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "ENABLE in context of " - "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%lli ehca_qp=%p qp_num=%x", - h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) - && !is_user) { - ret = check_for_left_cqes(my_qp, shca); - if (ret) - goto modify_qp_exit2; - } - - if (statetrans == IB_QPST_ANY2RESET) { - ipz_qeit_reset(&my_qp->ipz_rqueue); - ipz_qeit_reset(&my_qp->ipz_squeue); - - if (qp_cur_state == IB_QPS_ERR && !is_user) { - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - if (HAS_RQ(my_qp)) - del_from_err_list(my_qp->recv_cq, - &my_qp->rq_err_node); - } - if (!is_user) - reset_queue_map(&my_qp->sq_map); - - if (HAS_RQ(my_qp) && !is_user) - reset_queue_map(&my_qp->rq_map); - } - - if (attr_mask & IB_QP_QKEY) - my_qp->qkey = attr->qkey; - -modify_qp_exit2: - if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - my_qp->sqerr_purgeflag = 1; - } - -modify_qp_exit1: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) -{ - int ret = 0; - - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - - /* The if-block below caches qp_attr to be modified for GSI and SMI - * qps during the initialization by ib_mad. When the respective port - * is activated, ie we got an event PORT_ACTIVE, we'll replay the - * cached modify calls sequence, see ehca_recover_sqs() below. - * Why that is required: - * 1) If one port is connected, older code requires that port one - * to be connected and module option nr_ports=1 to be given by - * user, which is very inconvenient for end user. - * 2) Firmware accepts modify_qp() only if respective port has become - * active. Older code had a wait loop of 30sec create_qp()/ - * define_aqp1(), which is not appropriate in practice. This - * code now removes that wait loop, see define_aqp1(), and always - * reports all ports to ib_mad resp. users. Only activated ports - * will then usable for the users. - */ - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { - int port = my_qp->init_attr.port_num; - struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - /* cache qp_attr only during init */ - if (my_qp->mod_qp_parm) { - struct ehca_mod_qp_parm *p; - if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { - ehca_err(&shca->ib_device, - "mod_qp_parm overflow state=%x port=%x" - " type=%x", attr->qp_state, - my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, - flags); - return -EINVAL; - } - p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; - p->mask = attr_mask; - p->attr = *attr; - my_qp->mod_qp_parm_idx++; - ehca_dbg(&shca->ib_device, - "Saved qp_attr for state=%x port=%x type=%x", - attr->qp_state, my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - goto out; - } - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - ret = internal_modify_qp(ibqp, attr, attr_mask, 0); - -out: - if ((ret == 0) && (attr_mask & IB_QP_STATE)) - my_qp->state = attr->qp_state; - - return ret; -} - -void ehca_recover_sqp(struct ib_qp *sqp) -{ - struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); - int port = my_sqp->init_attr.port_num; - struct ib_qp_attr attr; - struct ehca_mod_qp_parm *qp_parm; - int i, qp_parm_idx, ret; - unsigned long flags, wr_cnt; - - if (!my_sqp->mod_qp_parm) - return; - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); - - qp_parm = my_sqp->mod_qp_parm; - qp_parm_idx = my_sqp->mod_qp_parm_idx; - for (i = 0; i < qp_parm_idx; i++) { - attr = qp_parm[i].attr; - ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); - if (ret) { - ehca_err(sqp->device, "Could not modify SQP port=%x " - "qp_num=%x ret=%x", port, sqp->qp_num, ret); - goto free_qp_parm; - } - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", - port, sqp->qp_num, attr.qp_state); - } - - /* re-trigger posted recv wrs */ - wr_cnt = my_sqp->ipz_rqueue.current_q_offset / - my_sqp->ipz_rqueue.qe_size; - if (wr_cnt) { - spin_lock_irqsave(&my_sqp->spinlock_r, flags); - hipz_update_rqa(my_sqp, wr_cnt); - spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); - ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", - port, sqp->qp_num, wr_cnt); - } - -free_qp_parm: - kfree(qp_parm); - /* this prevents subsequent calls to modify_qp() to cache qp_attr */ - my_sqp->mod_qp_parm = NULL; -} - -int ehca_query_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int cnt, ret = 0; - u64 h_ret; - - if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device, "Invalid attribute mask " - "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", - my_qp, qp->qp_num, qp_attr_mask); - return -EINVAL; - } - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(qp->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp->qp_num, h_ret); - goto query_qp_exit1; - } - - qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); - qp_attr->qp_state = qp_attr->cur_qp_state; - - if (qp_attr->cur_qp_state == -EINVAL) { - ret = -EINVAL; - ehca_err(qp->device, "Got invalid ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - qpcb->qp_state, my_qp, qp->qp_num); - goto query_qp_exit1; - } - - if (qp_attr->qp_state == IB_QPS_SQD) - qp_attr->sq_draining = 1; - - qp_attr->qkey = qpcb->qkey; - qp_attr->path_mtu = qpcb->path_mtu; - qp_attr->path_mig_state = qpcb->path_migration_state - 1; - qp_attr->rq_psn = qpcb->receive_psn; - qp_attr->sq_psn = qpcb->send_psn; - qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; - qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; - qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; - /* UD_AV CIRCUMVENTION */ - if (my_qp->qp_type == IB_QPT_UD) { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe - 2; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe - 2; - } else { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe; - } - - qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; - qp_attr->dest_qp_num = qpcb->dest_qp_nr; - - qp_attr->pkey_index = qpcb->prim_p_key_idx; - qp_attr->port_num = qpcb->prim_phys_port; - qp_attr->timeout = qpcb->timeout; - qp_attr->retry_cnt = qpcb->retry_count; - qp_attr->rnr_retry = qpcb->rnr_retry_count; - - qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; - qp_attr->alt_port_num = qpcb->alt_phys_port; - qp_attr->alt_timeout = qpcb->timeout_al; - - qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; - qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; - - /* primary av */ - qp_attr->ah_attr.sl = qpcb->service_level; - - if (qpcb->send_grh_flag) { - qp_attr->ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->ah_attr.static_rate = qpcb->max_static_rate; - qp_attr->ah_attr.dlid = qpcb->dlid; - qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; - qp_attr->ah_attr.port_num = qp_attr->port_num; - - /* primary GRH */ - qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; - qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; - qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; - qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid.byte[cnt]; - - /* alternate AV */ - qp_attr->alt_ah_attr.sl = qpcb->service_level_al; - if (qpcb->send_grh_flag_al) { - qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; - qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; - qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; - - /* alternate GRH */ - qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; - qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; - qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; - qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid_al.byte[cnt]; - - /* return init attributes given in ehca_create_qp */ - if (qp_init_attr) - *qp_init_attr = my_qp->init_attr; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); - -query_qp_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) -{ - struct ehca_qp *my_qp = - container_of(ibsrq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = - container_of(ibsrq->pd->device, struct ehca_shca, ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 update_mask; - u64 h_ret; - int ret = 0; - - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - update_mask = 0; - if (attr_mask & IB_SRQ_LIMIT) { - attr_mask &= ~IB_SRQ_LIMIT; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = attr->srq_limit; - mqpcb->qp_aff_asyn_ev_log_reg = - EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); - } - - /* by now, all bits in attr_mask should have been cleared */ - if (attr_mask) { - ehca_err(ibsrq->device, "invalid attribute mask bits set " - "attr_mask=%x", attr_mask); - ret = -EINVAL; - goto modify_srq_exit0; - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, - NULL, update_mask, mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", - h_ret, my_qp, my_qp->real_qp_num); - } - -modify_srq_exit0: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) -{ - struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int ret = 0; - u64 h_ret; - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(srq->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, - NULL, qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, h_ret); - goto query_srq_exit1; - } - - srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = 3; - srq_attr->srq_limit = qpcb->curr_srq_limit; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - -query_srq_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject) -{ - struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); - struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; - u32 qp_num = my_qp->real_qp_num; - int ret; - u64 h_ret; - u8 port_num; - int is_user = 0; - enum ib_qp_type qp_type; - unsigned long flags; - - if (uobject) { - is_user = 1; - if (my_qp->mm_count_galpa || - my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(dev, "Resources still referenced in " - "user space qp_num=%x", qp_num); - return -EINVAL; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); - if (ret) { - ehca_err(dev, "Couldn't unassign qp from " - "send_cq ret=%i qp_num=%x cq_num=%x", ret, - qp_num, my_qp->send_cq->cq_number); - return ret; - } - } - - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - - /* - * SRQs will never get into an error list and do not have a recv_cq, - * so we need to skip them here. - */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) - del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - - if (HAS_SQ(my_qp) && !is_user) - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - /* now wait until all pending events have completed */ - wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); - - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); - return ehca2ib_return_code(h_ret); - } - - port_num = my_qp->init_attr.port_num; - qp_type = my_qp->init_attr.qp_type; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - /* no support for IB_QPT_SMI yet */ - if (qp_type == IB_QPT_GSI) { - struct ib_event event; - ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port_num; - shca->sport[port_num - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - } - - if (HAS_RQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (!is_user) - vfree(my_qp->rq_map.map); - } - if (HAS_SQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - if (!is_user) - vfree(my_qp->sq_map.map); - } - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return 0; -} - -int ehca_destroy_qp(struct ib_qp *qp) -{ - return internal_destroy_qp(qp->device, - container_of(qp, struct ehca_qp, ib_qp), - qp->uobject); -} - -int ehca_destroy_srq(struct ib_srq *srq) -{ - return internal_destroy_qp(srq->device, - container_of(srq, struct ehca_qp, ib_srq), - srq->uobject); -} - -int ehca_init_qp_cache(void) -{ - qp_cache = kmem_cache_create("ehca_cache_qp", - sizeof(struct ehca_qp), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!qp_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_qp_cache(void) -{ - if (qp_cache) - kmem_cache_destroy(qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c deleted file mode 100644 index 47f9498..0000000 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ /dev/null @@ -1,953 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * post_send/recv, poll_cq, req_notify - * - * Authors: Hoang-Nam Nguyen - * Waleri Fomin - * Joachim Fenkes - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -/* in RC traffic, insert an empty RDMA READ every this many packets */ -#define ACK_CIRC_THRESHOLD 2000000 - -static u64 replace_wr_id(u64 wr_id, u16 idx) -{ - u64 ret; - - ret = wr_id & ~QMAP_IDX_MASK; - ret |= idx & QMAP_IDX_MASK; - - return ret; -} - -static u16 get_app_wr_id(u64 wr_id) -{ - return wr_id & QMAP_IDX_MASK; -} - -static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, - struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr, - u32 rq_map_idx) -{ - u8 cnt_ds; - if (unlikely((recv_wr->num_sge < 0) || - (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); - wqe_p->nr_of_data_seg = recv_wr->num_sge; - - for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { - wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = - recv_wr->sg_list[cnt_ds].addr; - wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = - recv_wr->sg_list[cnt_ds].lkey; - wqe_p->u.all_rcv.sg_list[cnt_ds].length = - recv_wr->sg_list[cnt_ds].length; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", - ipz_rqueue); - ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); - } - - return 0; -} - -#if defined(DEBUG_GSI_SEND_WR) - -/* need ib_mad struct */ -#include - -static void trace_send_wr_ud(const struct ib_send_wr *send_wr) -{ - int idx; - int j; - while (send_wr) { - struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; - struct ib_sge *sge = send_wr->sg_list; - ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x", idx, send_wr->wr_id, - send_wr->num_sge, send_wr->send_flags, - send_wr->opcode); - if (mad_hdr) { - ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " - "mgmt_class=%x class_version=%x method=%x " - "status=%x class_specific=%x tid=%lx " - "attr_id=%x resv=%x attr_mod=%x", - idx, mad_hdr->base_version, - mad_hdr->mgmt_class, - mad_hdr->class_version, mad_hdr->method, - mad_hdr->status, mad_hdr->class_specific, - mad_hdr->tid, mad_hdr->attr_id, - mad_hdr->resv, - mad_hdr->attr_mod); - } - for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = __va(sge->addr); - ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " - "lkey=%x", - idx, j, data, sge->length, sge->lkey); - /* assume length is n*16 */ - ehca_dmp(data, sge->length, "send_wr#%x sge#%x", - idx, j); - sge++; - } /* eof for j */ - idx++; - send_wr = send_wr->next; - } /* eof while send_wr */ -} - -#endif /* DEBUG_GSI_SEND_WR */ - -static inline int ehca_write_swqe(struct ehca_qp *qp, - struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr, - u32 sq_map_idx, - int hidden) -{ - u32 idx; - u64 dma_length; - struct ehca_av *my_av; - u32 remote_qkey = send_wr->wr.ud.remote_qkey; - struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; - - if (unlikely((send_wr->num_sge < 0) || - (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - - qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 0; - - switch (send_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_SEND; - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_RDMAWRITE; - break; - case IB_WR_RDMA_READ: - wqe_p->optype = WQE_OPTYPE_RDMAREAD; - break; - default: - ehca_gen_err("Invalid opcode=%x", send_wr->opcode); - return -EINVAL; /* invalid opcode */ - } - - wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; - - wqe_p->wr_flag = 0; - - if ((send_wr->send_flags & IB_SEND_SIGNALED || - qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) - && !hidden) { - wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; - qmap_entry->cqe_req = 1; - } - - if (send_wr->opcode == IB_WR_SEND_WITH_IMM || - send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); - wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; - } - - wqe_p->nr_of_data_seg = send_wr->num_sge; - - switch (qp->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - /* no break is intential here */ - case IB_QPT_UD: - /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) - remote_qkey = qp->qkey; - - wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; - wqe_p->local_ee_context_qkey = remote_qkey; - if (unlikely(!send_wr->wr.ud.ah)) { - ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); - return -EINVAL; - } - if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { - ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); - return -EINVAL; - } - my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); - wqe_p->u.ud_av.ud_av = my_av->av; - - /* - * omitted check of IB_SEND_INLINE - * since HW does not support it - */ - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.ud_av.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.ud_av.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.ud_av.sg_list[idx].length = - send_wr->sg_list[idx].length; - } /* eof for idx */ - if (qp->qp_type == IB_QPT_SMI || - qp->qp_type == IB_QPT_GSI) - wqe_p->u.ud_av.ud_av.pmtu = 1; - if (qp->qp_type == IB_QPT_GSI) { - wqe_p->pkeyi = send_wr->wr.ud.pkey_index; -#ifdef DEBUG_GSI_SEND_WR - trace_send_wr_ud(send_wr); -#endif /* DEBUG_GSI_SEND_WR */ - } - break; - - case IB_QPT_UC: - if (send_wr->send_flags & IB_SEND_FENCE) - wqe_p->wr_flag |= WQE_WRFLAG_FENCE; - /* no break is intentional here */ - case IB_QPT_RC: - /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_address = - send_wr->wr.rdma.remote_addr; - wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; - - /* - * omitted checking of IB_SEND_INLINE - * since HW does not support it - */ - dma_length = 0; - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.nud.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.nud.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.nud.sg_list[idx].length = - send_wr->sg_list[idx].length; - dma_length += send_wr->sg_list[idx].length; - } /* eof idx */ - wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; - - /* unsolicited ack circumvention */ - if (send_wr->opcode == IB_WR_RDMA_READ) { - /* on RDMA read, switch on and reset counters */ - qp->message_count = qp->packet_count = 0; - qp->unsol_ack_circ = 1; - } else - /* else estimate #packets */ - qp->packet_count += (dma_length >> qp->mtu_shift) + 1; - - break; - - default: - ehca_gen_err("Invalid qptype=%x", qp->qp_type); - return -EINVAL; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); - } - return 0; -} - -/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ -static inline void map_ib_wc_status(u32 cqe_status, - enum ib_wc_status *wc_status) -{ - if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { - switch (cqe_status & 0x3F) { - case 0x01: - case 0x21: - *wc_status = IB_WC_LOC_LEN_ERR; - break; - case 0x02: - case 0x22: - *wc_status = IB_WC_LOC_QP_OP_ERR; - break; - case 0x03: - case 0x23: - *wc_status = IB_WC_LOC_EEC_OP_ERR; - break; - case 0x04: - case 0x24: - *wc_status = IB_WC_LOC_PROT_ERR; - break; - case 0x05: - case 0x25: - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - case 0x06: - *wc_status = IB_WC_MW_BIND_ERR; - break; - case 0x07: /* remote error - look into bits 20:24 */ - switch ((cqe_status - & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { - case 0x0: - /* - * PSN Sequence Error! - * couldn't find a matching status! - */ - *wc_status = IB_WC_GENERAL_ERR; - break; - case 0x1: - *wc_status = IB_WC_REM_INV_REQ_ERR; - break; - case 0x2: - *wc_status = IB_WC_REM_ACCESS_ERR; - break; - case 0x3: - *wc_status = IB_WC_REM_OP_ERR; - break; - case 0x4: - *wc_status = IB_WC_REM_INV_RD_REQ_ERR; - break; - } - break; - case 0x08: - *wc_status = IB_WC_RETRY_EXC_ERR; - break; - case 0x09: - *wc_status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case 0x0A: - case 0x2D: - *wc_status = IB_WC_REM_ABORT_ERR; - break; - case 0x0B: - case 0x2E: - *wc_status = IB_WC_INV_EECN_ERR; - break; - case 0x0C: - case 0x2F: - *wc_status = IB_WC_INV_EEC_STATE_ERR; - break; - case 0x0D: - *wc_status = IB_WC_BAD_RESP_ERR; - break; - case 0x10: - /* WQE purged */ - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - default: - *wc_status = IB_WC_FATAL_ERR; - - } - } else - *wc_status = IB_WC_SUCCESS; -} - -static inline int post_one_send(struct ehca_qp *my_qp, - struct ib_send_wr *cur_send_wr, - int hidden) -{ - struct ehca_wqe *wqe_p; - int ret; - u32 sq_map_idx; - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -ENOMEM; - } - - /* - * Get the index of the WQE in the send queue. The same index is used - * for writing into the sq_map. - */ - sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; - - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - ehca_err(my_qp->ib_qp.device, "Could not write WQE " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - int wqe_cnt = 0; - int ret = 0; - unsigned long flags; - - /* Reject WR if QP is in RESET, INIT or RTR state */ - if (unlikely(my_qp->state < IB_QPS_RTS)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - ret = -EINVAL; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - - /* Send an empty extra RDMA read if: - * 1) there has been an RDMA read on this connection before - * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets - * 3) we can be sure that any previous extra RDMA read has been - * processed so we don't overflow the SQ - */ - if (unlikely(my_qp->unsol_ack_circ && - my_qp->packet_count > ACK_CIRC_THRESHOLD && - my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { - /* insert an empty RDMA READ to fix up the remote QP state */ - struct ib_send_wr circ_wr; - memset(&circ_wr, 0, sizeof(circ_wr)); - circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ - wqe_cnt++; - ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); - my_qp->message_count = my_qp->packet_count = 0; - } - - /* loop processes list of send reqs */ - while (send_wr) { - ret = post_one_send(my_qp, send_wr, 0); - if (unlikely(ret)) { - goto post_send_exit0; - } - wqe_cnt++; - send_wr = send_wr->next; - } - -post_send_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, qp->qp_num, wqe_cnt, ret); - my_qp->message_count += wqe_cnt; - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - -out: - if (ret) - *bad_send_wr = send_wr; - return ret; -} - -static int internal_post_recv(struct ehca_qp *my_qp, - struct ib_device *dev, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_wqe *wqe_p; - int wqe_cnt = 0; - int ret = 0; - u32 rq_map_idx; - unsigned long flags; - struct ehca_qmap_entry *qmap_entry; - - if (unlikely(!HAS_RQ(my_qp))) { - ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", - my_qp, my_qp->real_qp_num, my_qp->ext_type); - ret = -ENODEV; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, flags); - - /* loop processes list of recv reqs */ - while (recv_wr) { - u64 start_offset = my_qp->ipz_rqueue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - /* - * Get the index of the WQE in the recv queue. The same index - * is used for writing into the rq_map. - */ - rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; - - /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, - rq_map_idx); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_rqueue.current_q_offset = start_offset; - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - - qmap_entry = &my_qp->rq_map.map[rq_map_idx]; - qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 1; - - wqe_cnt++; - recv_wr = recv_wr->next; - } /* eof for recv_wr */ - -post_recv_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_rqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, my_qp->real_qp_num, wqe_cnt, ret); - spin_unlock_irqrestore(&my_qp->spinlock_r, flags); - -out: - if (ret) - *bad_recv_wr = recv_wr; - - return ret; -} - -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - - /* Reject WR if QP is in RESET state */ - if (unlikely(my_qp->state == IB_QPS_RESET)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - *bad_recv_wr = recv_wr; - return -EINVAL; - } - - return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); -} - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), - srq->device, recv_wr, bad_recv_wr); -} - -/* - * ib_wc_opcode table converts ehca wc opcode to ib - * Since we use zero to indicate invalid opcode, the actual ib opcode must - * be decremented!!! - */ -static const u8 ib_wc_opcode[255] = { - [0x01] = IB_WC_RECV+1, - [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, - [0x04] = IB_WC_BIND_MW+1, - [0x08] = IB_WC_FETCH_ADD+1, - [0x10] = IB_WC_COMP_SWAP+1, - [0x20] = IB_WC_RDMA_WRITE+1, - [0x40] = IB_WC_RDMA_READ+1, - [0x80] = IB_WC_SEND+1 -}; - -/* internal function to poll one entry of cq */ -static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) -{ - int ret = 0, qmap_tail_idx; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - struct ehca_cqe *cqe; - struct ehca_qp *my_qp; - struct ehca_qmap_entry *qmap_entry; - struct ehca_queue_map *qmap; - int cqe_count = 0, is_error; - -repoll: - cqe = (struct ehca_cqe *) - ipz_qeit_get_inc_valid(&my_cq->ipz_queue); - if (!cqe) { - ret = -EAGAIN; - if (ehca_debug_level >= 3) - ehca_dbg(cq->device, "Completion queue is empty " - "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); - goto poll_cq_one_exit0; - } - - /* prevents loads being reordered across this point */ - rmb(); - - cqe_count++; - if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp; - int purgeflag; - unsigned long flags; - - qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); - if (!qp) { - ehca_err(cq->device, "cq_num=%x qp_num=%x " - "could not find qp -> ignore cqe", - my_cq->cq_number, cqe->local_qp_number); - ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", - my_cq->cq_number, cqe->local_qp_number); - /* ignore this purged cqe */ - goto repoll; - } - spin_lock_irqsave(&qp->spinlock_s, flags); - purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, flags); - - if (purgeflag) { - ehca_dbg(cq->device, - "Got CQE with purged bit qp_num=%x src_qp=%x", - cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level >= 2) - ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", - cqe->local_qp_number, - cqe->remote_qp_number); - /* - * ignore this to avoid double cqes of bad wqe - * that caused sqe and turn off purge flag - */ - qp->sqerr_purgeflag = 0; - goto repoll; - } - } - - is_error = cqe->status & WC_STATUS_ERROR_BIT; - - /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ - if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { - ehca_dbg(cq->device, - "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", - is_error ? "ERROR " : "", my_cq, my_cq->cq_number); - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x -------------------------", - my_cq, my_cq->cq_number); - } - - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - read_unlock(&ehca_qp_idr_lock); - if (!my_qp) - goto repoll; - wc->qp = &my_qp->ib_qp; - - qmap_tail_idx = get_app_wr_id(cqe->work_request_id); - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) - /* We got a send completion. */ - qmap = &my_qp->sq_map; - else - /* We got a receive completion. */ - qmap = &my_qp->rq_map; - - /* advance the tail pointer */ - qmap->tail = qmap_tail_idx; - - if (is_error) { - /* - * set left_to_poll to 0 because in error state, we will not - * get any additional CQEs - */ - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - my_qp->sq_map.left_to_poll = 0; - ehca_add_to_err_list(my_qp, 1); - - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - my_qp->rq_map.left_to_poll = 0; - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - - qmap_entry = &qmap->map[qmap_tail_idx]; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - - wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); - qmap_entry->reported = 1; - - /* if left_to_poll is decremented to 0, add the QP to the error list */ - if (qmap->left_to_poll > 0) { - qmap->left_to_poll--; - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - ehca_add_to_err_list(my_qp, 1); - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - } - - /* eval ib_wc_opcode */ - wc->opcode = ib_wc_opcode[cqe->optype]-1; - if (unlikely(wc->opcode == -1)) { - ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " - "ehca_cq=%p cq_num=%x", - cqe->optype, cqe->status, my_cq, my_cq->cq_number); - /* dump cqe for other infos */ - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - /* update also queue adder to throw away this entry!!! */ - goto repoll; - } - - /* eval ib_wc_status */ - if (unlikely(is_error)) { - /* complete with errors */ - map_ib_wc_status(cqe->status, &wc->status); - wc->vendor_err = wc->status; - } else - wc->status = IB_WC_SUCCESS; - - wc->byte_len = cqe->nr_bytes_transferred; - wc->pkey_index = cqe->pkey_index; - wc->slid = cqe->rlid; - wc->dlid_path_bits = cqe->dlid; - wc->src_qp = cqe->remote_qp_number; - /* - * HW has "Immed data present" and "GRH present" in bits 6 and 5. - * SW defines those in bits 1 and 0, so we can just shift and mask. - */ - wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; - wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); - wc->sl = cqe->service_level; - -poll_cq_one_exit0: - if (cqe_count > 0) - hipz_update_feca(my_cq, cqe_count); - - return ret; -} - -static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, - struct ib_wc *wc, int num_entries, - struct ipz_queue *ipz_queue, int on_sq) -{ - int nr = 0; - struct ehca_wqe *wqe; - u64 offset; - struct ehca_queue_map *qmap; - struct ehca_qmap_entry *qmap_entry; - - if (on_sq) - qmap = &my_qp->sq_map; - else - qmap = &my_qp->rq_map; - - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - while ((nr < num_entries) && (qmap_entry->reported == 0)) { - /* generate flush CQE */ - - memset(wc, 0, sizeof(*wc)); - - offset = qmap->next_wqe_idx * ipz_queue->qe_size; - wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); - if (!wqe) { - ehca_err(cq->device, "Invalid wqe offset=%#llx on " - "qp_num=%#x", offset, my_qp->real_qp_num); - return nr; - } - - wc->wr_id = replace_wr_id(wqe->work_request_id, - qmap_entry->app_wr_id); - - if (on_sq) { - switch (wqe->optype) { - case WQE_OPTYPE_SEND: - wc->opcode = IB_WC_SEND; - break; - case WQE_OPTYPE_RDMAWRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case WQE_OPTYPE_RDMAREAD: - wc->opcode = IB_WC_RDMA_READ; - break; - default: - ehca_err(cq->device, "Invalid optype=%x", - wqe->optype); - return nr; - } - } else - wc->opcode = IB_WC_RECV; - - if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { - wc->ex.imm_data = wqe->immediate_data; - wc->wc_flags |= IB_WC_WITH_IMM; - } - - wc->status = IB_WC_WR_FLUSH_ERR; - - wc->qp = &my_qp->ib_qp; - - /* mark as reported and advance next_wqe pointer */ - qmap_entry->reported = 1; - qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, - qmap->entries); - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - wc++; nr++; - } - - return nr; - -} - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int nr; - struct ehca_qp *err_qp; - struct ib_wc *current_wc = wc; - int ret = 0; - unsigned long flags; - int entries_left = num_entries; - - if (num_entries < 1) { - ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " - "cq_num=%x", num_entries, my_cq, my_cq->cq_number); - ret = -EINVAL; - goto poll_cq_exit0; - } - - spin_lock_irqsave(&my_cq->spinlock, flags); - - /* generate flush cqes for send queues */ - list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_squeue, 1); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - /* generate flush cqes for receive queues */ - list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_rqueue, 0); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - for (nr = 0; nr < entries_left; nr++) { - ret = ehca_poll_cq_one(cq, current_wc); - if (ret) - break; - current_wc++; - } /* eof for nr */ - entries_left -= nr; - - spin_unlock_irqrestore(&my_cq->spinlock, flags); - if (ret == -EAGAIN || !ret) - ret = num_entries - entries_left; - -poll_cq_exit0: - return ret; -} - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int ret = 0; - - switch (notify_flags & IB_CQ_SOLICITED_MASK) { - case IB_CQ_SOLICITED: - hipz_set_cqx_n0(my_cq, 1); - break; - case IB_CQ_NEXT_COMP: - hipz_set_cqx_n1(my_cq, 1); - break; - default: - return -EINVAL; - } - - if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { - unsigned long spl_flags; - spin_lock_irqsave(&my_cq->spinlock, spl_flags); - ret = ipz_qeit_is_valid(&my_cq->ipz_queue); - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); - } - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c deleted file mode 100644 index 376b031..0000000 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * SQP functions - * - * Authors: Khadija Souissi - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) - -#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) - -/** - * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue - * pair is created successfully, the corresponding port gets active. - * - * Define Special Queue pair 0 (SMI QP) is still not supported. - * - * @qp_init_attr: Queue pair init attributes with port and queue pair type - */ - -u64 ehca_define_sqp(struct ehca_shca *shca, - struct ehca_qp *ehca_qp, - struct ib_qp_init_attr *qp_init_attr) -{ - u32 pma_qp_nr, bma_qp_nr; - u64 ret; - u8 port = qp_init_attr->port_num; - int counter; - - shca->sport[port - 1].port_state = IB_PORT_DOWN; - - switch (qp_init_attr->qp_type) { - case IB_QPT_SMI: - /* function not supported yet */ - break; - case IB_QPT_GSI: - ret = hipz_h_define_aqp1(shca->ipz_hca_handle, - ehca_qp->ipz_qp_handle, - ehca_qp->galpas.kernel, - (u32) qp_init_attr->port_num, - &pma_qp_nr, &bma_qp_nr); - - if (ret != H_SUCCESS) { - ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. h_ret=%lli", - port, ret); - return ret; - } - shca->sport[port - 1].pma_qp_nr = pma_qp_nr; - ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", - port, pma_qp_nr); - break; - default: - ehca_err(&shca->ib_device, "invalid qp_type=%x", - qp_init_attr->qp_type); - return H_PARAMETER; - } - - if (ehca_nr_ports < 0) /* autodetect mode */ - return H_SUCCESS; - - for (counter = 0; - shca->sport[port - 1].port_state != IB_PORT_ACTIVE && - counter < ehca_port_act_time; - counter++) { - ehca_dbg(&shca->ib_device, "... wait until port %x is active", - port); - msleep_interruptible(1000); - } - - if (counter == ehca_port_act_time) { - ehca_err(&shca->ib_device, "Port %x is not active.", port); - return H_HARDWARE; - } - - return H_SUCCESS; -} - -struct ib_perf { - struct ib_mad_hdr mad_hdr; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ -struct tcslfl { - u32 tc:8; - u32 sl:4; - u32 fl:20; -} __attribute__ ((packed)); - -/* IP Version/TC/FL packed into 32 bits, as in GRH */ -struct vertcfl { - u32 ver:4; - u32 tc:8; - u32 fl:20; -} __attribute__ ((packed)); - -static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; - struct ib_perf *out_perf = (struct ib_perf *)out_mad; - struct ib_class_port_info *poi = - (struct ib_class_port_info *)out_perf->data; - struct tcslfl *tcslfl = - (struct tcslfl *)&poi->redirect_tcslfl; - struct ehca_shca *shca = - container_of(ibdev, struct ehca_shca, ib_device); - struct ehca_sport *sport = &shca->sport[port_num - 1]; - - ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); - - *out_mad = *in_mad; - - if (in_perf->mad_hdr.class_version != 1) { - ehca_warn(ibdev, "Unsupported class_version=%x", - in_perf->mad_hdr.class_version); - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; - goto perf_reply; - } - - switch (in_perf->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - case IB_MGMT_METHOD_SET: - /* set class port info for redirection */ - out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; - out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; - memset(poi, 0, sizeof(*poi)); - poi->base_version = 1; - poi->class_version = 1; - poi->resp_time_value = 18; - - /* copy local routing information from WC where applicable */ - tcslfl->sl = in_wc->sl; - poi->redirect_lid = - sport->saved_attr.lid | in_wc->dlid_path_bits; - poi->redirect_qp = sport->pma_qp_nr; - poi->redirect_qkey = IB_QP1_QKEY; - - ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, - &poi->redirect_pkey); - - /* if request was globally routed, copy route info */ - if (in_grh) { - const struct vertcfl *vertcfl = - (const struct vertcfl *)&in_grh->version_tclass_flow; - memcpy(poi->redirect_gid, in_grh->dgid.raw, - sizeof(poi->redirect_gid)); - tcslfl->tc = vertcfl->tc; - tcslfl->fl = vertcfl->fl; - } else - /* else only fill in default GID */ - ehca_query_gid(ibdev, port_num, 0, - (union ib_gid *)&poi->redirect_gid); - - ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", - sport->saved_attr.lid, sport->pma_qp_nr); - break; - - case IB_MGMT_METHOD_GET_RESP: - return IB_MAD_RESULT_FAILURE; - - default: - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; - break; - } - -perf_reply: - out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - int ret; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - - if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) - return IB_MAD_RESULT_FAILURE; - - /* accept only pma request */ - if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) - return IB_MAD_RESULT_SUCCESS; - - ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, - in_mad, out_mad); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h deleted file mode 100644 index d280b12..0000000 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * auxiliary functions - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Khadija Souissi - * Waleri Fomin - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef EHCA_TOOLS_H -#define EHCA_TOOLS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -extern int ehca_debug_level; - -#define ehca_dbg(ib_dev, format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ - "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, \ - ## arg); \ - } while (0) - -#define ehca_info(ib_dev, format, arg...) \ - dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_warn(ib_dev, format, arg...) \ - dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_err(ib_dev, format, arg...) \ - dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/* use this one only if no ib_dev available */ -#define ehca_gen_dbg(format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg); \ - } while (0) - -#define ehca_gen_warn(format, arg...) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_gen_err(format, arg...) \ - printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/** - * ehca_dmp - printk a memory block, whose length is n*8 bytes. - * Each line has the following layout: - * adr=X ofs=Y <8 bytes hex> <8 bytes hex> - */ -#define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ - unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char *)(adr); \ - for (x = 0; x < l; x += 16) { \ - printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016llx %016llx\n", \ - __func__, ##args, deb, x, \ - *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ - deb += 16; \ - } \ - } while (0) - -/* define a bitmask, little endian version */ -#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) - -/* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) - -/* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) - -/* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) - -/** - * EHCA_BMASK_SET - return value shifted and masked by mask - * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable - * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask - * in variable - */ -#define EHCA_BMASK_SET(mask, value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) - -/** - * EHCA_BMASK_GET - extract a parameter from value by mask - */ -#define EHCA_BMASK_GET(mask, value) \ - (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) - -/* Converts ehca to ib return code */ -int ehca2ib_return_code(u64 ehca_rc); - -#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c deleted file mode 100644 index 1a1d5d9..0000000 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * userspace support verbs - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata) -{ - struct ehca_ucontext *my_context; - - my_context = kzalloc(sizeof *my_context, GFP_KERNEL); - if (!my_context) { - ehca_err(device, "Out of memory device=%p", device); - return ERR_PTR(-ENOMEM); - } - - return &my_context->ib_ucontext; -} - -int ehca_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); - return 0; -} - -static void ehca_mm_open(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)++; - if (!(*count)) - ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static void ehca_mm_close(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)--; - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static const struct vm_operations_struct vm_ops = { - .open = ehca_mm_open, - .close = ehca_mm_close, -}; - -static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, - u32 *mm_count) -{ - int ret; - u64 vsize, physical; - - vsize = vma->vm_end - vma->vm_start; - if (vsize < EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = galpas->user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ - ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_gen_err("remap_pfn_range() failed ret=%i", ret); - return -ENOMEM; - } - - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, - u32 *mm_count) -{ - int ret; - u64 start, ofs; - struct page *page; - - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - start = vma->vm_start; - for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { - u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); - page = virt_to_page(virt_addr); - ret = vm_insert_page(vma, start, page); - if (unlikely(ret)) { - ehca_gen_err("vm_insert_page() failed rc=%i", ret); - return ret; - } - start += PAGE_SIZE; - } - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); - ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_fw() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); - ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_queue() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", - rsrc_type, cq->cq_number); - return -EINVAL; - } - - return 0; -} - -static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); - ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return -ENOMEM; - } - break; - - case 1: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, - &qp->mm_count_rqueue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - case 2: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, - &qp->mm_count_squeue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", - rsrc_type, qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - u64 fileoffset = vma->vm_pgoff; - u32 idr_handle = fileoffset & 0x1FFFFFF; - u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ - u32 ret; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ib_uobject *uobject; - - switch (q_type) { - case 0: /* CQ */ - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, idr_handle); - read_unlock(&ehca_cq_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!cq) - return -EINVAL; - - if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_cq(vma, cq, rsrc_type); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_cq() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* QP */ - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, idr_handle); - read_unlock(&ehca_qp_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!qp) - return -EINVAL; - - uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; - if (!uobject || uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_qp(vma, qp, rsrc_type); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_qp() failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_gen_err("bad queue type %x", q_type); - return -EINVAL; - } - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c deleted file mode 100644 index 89517ff..0000000 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Joachim Fenkes - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hcp_phyp.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) -#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) -#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) -#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) -#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) -#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) -#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) -#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) -#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) -#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) -#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) - -#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) -#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) -#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) - -#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) -#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) -#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) - -#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) -#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) - -#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) - -#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) -#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) -#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) - -#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" -#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" -#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" - -static DEFINE_SPINLOCK(hcall_lock); - -static long ehca_plpar_hcall_norets(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, - arg5, arg6, arg7); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, - opcode, ret, arg1, arg2, arg3, - arg4, arg5, arg6, arg7); - else - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); - - return ret; - } - - return H_BUSY; -} - -static long ehca_plpar_hcall9(unsigned long opcode, - unsigned long *outs, /* array of 9 outputs */ - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall9(opcode, outs, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) { - ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - } else if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - return ret; - } - - return H_BUSY; -} - -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 *act_nr_of_entries, - u32 *act_pages, - u32 *eq_ist) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - u64 allocate_controls; - - /* resource type */ - allocate_controls = 3ULL; - - /* ISN is associated */ - if (neq_control != 1) - allocate_controls = (1ULL << (63 - 7)) | allocate_controls; - else /* notification event queue */ - allocate_controls = (1ULL << 63) | allocate_controls; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - number_of_entries, /* r6 */ - 0, 0, 0, 0, 0, 0); - eq_handle->handle = outs[0]; - *act_nr_of_entries = (u32)outs[3]; - *act_pages = (u32)outs[4]; - *eq_ist = (u32)outs[5]; - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask) -{ - return ehca_plpar_hcall_norets(H_RESET_EVENTS, - adapter_handle.handle, /* r4 */ - eq_handle.handle, /* r5 */ - event_mask, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param) -{ - int rc; - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 2, /* r5 */ - param->eq_handle.handle, /* r6 */ - cq->token, /* r7 */ - param->nr_cqe, /* r8 */ - 0, 0, 0, 0); - cq->ipz_cq_handle.handle = outs[0]; - param->act_nr_of_entries = (u32)outs[3]; - param->act_pages = (u32)outs[4]; - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[5]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user) -{ - int rc; - u64 ret; - u64 allocate_controls, max_r10_reg, r11, r12; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) - | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) - | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) - | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) - | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, - parms->squeue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, - parms->rqueue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_RECV_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_SEND_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, - parms->ud_av_l_key_ctl) - | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); - - max_r10_reg = - EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - parms->squeue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - parms->rqueue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->squeue.max_sge) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->rqueue.max_sge); - - r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); - - if (parms->ext_type == EQPT_SRQ) - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); - else - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - parms->send_cq_handle.handle, - parms->recv_cq_handle.handle, - parms->eq_handle.handle, - ((u64)parms->token << 32) | parms->pd.value, - max_r10_reg, r11, r12); - - parms->qp_handle.handle = outs[0]; - parms->real_qp_num = (u32)outs[1]; - parms->squeue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->rqueue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->squeue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->rqueue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->squeue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->rqueue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[6]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - parms->qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block) -{ - u64 ret; - u64 r_cb = __pa(query_port_response_block); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response block not page aligned"); - return H_PARAMETER; - } - - ret = ehca_plpar_hcall_norets(H_QUERY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - r_cb, /* r6 */ - 0, 0, 0, 0); - - if (ehca_debug_level >= 2) - ehca_dmp(query_port_response_block, 64, "response_block"); - - return ret; -} - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask) -{ - u64 port_attributes = port_cap; - - if (modify_mask & IB_PORT_SHUTDOWN) - port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); - if (modify_mask & IB_PORT_INIT_TYPE) - port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); - if (modify_mask & IB_PORT_RESET_QKEY_CNTR) - port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); - - return ehca_plpar_hcall_norets(H_MODIFY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - port_attributes, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock) -{ - u64 r_cb = __pa(query_hca_rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response_block=%p not page aligned", - query_hca_rblock); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_QUERY_HCA, - adapter_handle.handle, /* r4 */ - r_cb, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count) -{ - return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, - adapter_handle.handle, /* r4 */ - (u64)queue_type | ((u64)pagesize) << 8, - /* r5 */ - resource_handle, /* r6 */ - logical_address_of_page, /* r7 */ - count, /* r8 */ - 0, 0); -} - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - if (count != 1) { - ehca_gen_err("Ppage counter=%llx", count); - return H_PARAMETER; - } - return hipz_h_register_rpage(adapter_handle, - pagesize, - queue_type, - eq_handle.handle, - logical_address_of_page, count); -} - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, - u32 ist) -{ - u64 ret; - ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, - adapter_handle.handle, /* r4 */ - ist, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret != H_SUCCESS && ret != H_BUSY) - ehca_gen_err("Could not query interrupt state."); - - return ret; -} - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal) -{ - if (count != 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - cq_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa) -{ - if (count > 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - qp_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe2processed, - void **log_addr_next_rq_wqe2processed, - int dis_and_get_function_code) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - dis_and_get_function_code, /* r5 */ - qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void *)outs[0]; - if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void *)outs[1]; - - return ret; -} - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - update_mask, /* r6 */ - __pa(mqpcb), /* r7 */ - 0, 0, 0, 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal) -{ - return ehca_plpar_hcall_norets(H_QUERY_QP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - __pa(qqpcb), /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = hcp_galpas_dtor(&qp->galpas); - if (ret) { - ehca_gen_err("Could not destruct qp->galpas"); - return H_RESOURCE; - } - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - /* function code */ - 1, /* r5 */ - qp->ipz_qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%lli", ret); - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - qp->ipz_qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port) -{ - return ehca_plpar_hcall_norets(H_DEFINE_AQP0, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0, 0, 0); - *pma_qp_nr = (u32)outs[0]; - *bma_qp_nr = (u32)outs[1]; - - if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - u64 ret; - - ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - return ehca_plpar_hcall_norets(H_DETACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); -} - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag) -{ - u64 ret; - - ret = hcp_galpas_dtor(&cq->galpas); - if (ret) { - ehca_gen_err("Could not destruct cp->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - force_flag != 0 ? 1L : 0L, /* r6 */ - 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq) -{ - u64 ret; - - ret = hcp_galpas_dtor(&eq->galpas); - if (ret) { - ehca_gen_err("Could not destruct eq->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - eq->ipz_eq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 5, /* r5 */ - vaddr, /* r6 */ - length, /* r7 */ - (((u64)access_ctrl) << 32ULL), /* r8 */ - pd.value, /* r9 */ - 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - u64 ret; - - if (unlikely(ehca_debug_level >= 3)) { - if (count > 1) { - u64 *kpage; - int i; - kpage = __va(logical_address_of_page); - for (i = 0; i < count; i++) - ehca_gen_dbg("kpage[%d]=%p", - i, (void *)kpage[i]); - } else - ehca_gen_dbg("kpage=%p", - (void *)logical_address_of_page); - } - - if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { - ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%llx mr=%p mr_handle=%llx " - "pagesize=%x queue_type=%x " - "logical_address_of_page=%llx count=%llx", - adapter_handle.handle, mr, - mr->ipz_mr_handle.handle, pagesize, queue_type, - logical_address_of_page, count); - ret = H_PARAMETER; - } else - ret = hipz_h_register_rpage(adapter_handle, pagesize, - queue_type, - mr->ipz_mr_handle.handle, - logical_address_of_page, count); - return ret; -} - -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->len = outs[0]; - outparms->vaddr = outs[1]; - outparms->acl = outs[4] >> 32; - outparms->lkey = (u32)(outs[5] >> 32); - outparms->rkey = (u32)(outs[5] & (0xffffffff)); - - return ret; -} - -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - length, /* r7 */ - /* r8 */ - ((((u64)access_ctrl) << 32ULL) | pd.value), - mr_addr_cb, /* r9 */ - 0, 0, 0); - outparms->vaddr = outs[1]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, - adapter_handle.handle, /* r4 */ - orig_mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - (((u64)access_ctrl) << 32ULL), /* r7 */ - pd.value, /* r8 */ - 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 6, /* r5 */ - pd.value, /* r6 */ - 0, 0, 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MW, outs, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count) -{ - u64 r_cb = __pa(rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("rblock not page aligned."); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_ERROR_DATA, - adapter_handle.handle, - ressource_handle, - r_cb, - 0, 0, 0, 0); -} - -u64 hipz_h_eoi(int irq) -{ - unsigned long xirr; - - iosync(); - xirr = (0xffULL << 24) | irq; - - return plpar_hcall_norets(H_EOI, xirr); -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h deleted file mode 100644 index a46e514..0000000 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_IF_H__ -#define __HCP_IF_H__ - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "hipz_hw.h" - -/* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize - * resources, create the empty EQPT (ring). - */ -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 * act_nr_of_entries, - u32 * act_pages, - u32 * eq_ist); - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask); -/* - * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize - * resources, create the empty CQPT (ring). - */ -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param); - - -/* - * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, - * initialize resources, create empty QPPTs (2 rings). - */ -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user); - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block); - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask); - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock); - -/* - * hipz_h_register_rpage internal function in hcp_if.h for all - * hcp_H_REGISTER_RPAGE calls. - */ -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count); - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle - hcp_adapter_handle, - u32 ist); - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal); - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa); - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe_tb_processed, - void **log_addr_next_rq_wqe_tb_processed, - int dis_and_get_function_code); -enum hcall_sigt { - HCALL_SIGT_NO_CQE = 0, - HCALL_SIGT_BY_WQE = 1, - HCALL_SIGT_EVERY = 2 -}; - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal); - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal); - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp); - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port); - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr); - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag); - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq); - -/* - * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -/* hipz_h_query_mr queries MR in HW and FW */ -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_free_resource_mr frees MR resources in HW and FW */ -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr); - -/* hipz_h_reregister_pmr reregisters MR in HW and FW */ -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_smr register shared MR in HW and FW */ -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* - * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_query_mw queries MW in HW and FW */ -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_free_resource_mw frees MW resources in HW and FW */ -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw); - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count); -u64 hipz_h_eoi(int irq); - -#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c deleted file mode 100644 index 077376f..0000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * load store abstraction for ehca register access with tracing - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -u64 hcall_map_page(u64 physaddr) -{ - return (u64)ioremap(physaddr, EHCA_PAGESIZE); -} - -int hcall_unmap_page(u64 mapaddr) -{ - iounmap((volatile void __iomem *) mapaddr); - return 0; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user) -{ - if (!is_user) { - galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); - if (!galpas->kernel.fw_handle) - return -ENOMEM; - } else - galpas->kernel.fw_handle = 0; - - galpas->user.fw_handle = paddr_user; - - return 0; -} - -int hcp_galpas_dtor(struct h_galpas *galpas) -{ - if (galpas->kernel.fw_handle) { - int ret = hcall_unmap_page(galpas->kernel.fw_handle); - if (ret) - return ret; - } - - galpas->user.fw_handle = galpas->kernel.fw_handle = 0; - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h deleted file mode 100644 index d1b0299..0000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware calls - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Waleri Fomin - * Gerd Bayer - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_PHYP_H__ -#define __HCP_PHYP_H__ - - -/* - * eHCA page (mapped into memory) - * resource to access eHCA register pages in CPU address space -*/ -struct h_galpa { - u64 fw_handle; - /* for pSeries this is a 64bit memory address where - I/O memory is mapped into CPU address space (kv) */ -}; - -/* - * resource to access eHCA address space registers, all types - */ -struct h_galpas { - u32 pid; /*PID of userspace galpa checking */ - struct h_galpa user; /* user space accessible resource, - set to 0 if unused */ - struct h_galpa kernel; /* kernel space accessible resource, - set to 0 if unused */ -}; - -static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) -{ - u64 addr = galpa.fw_handle + offset; - return *(volatile u64 __force *)addr; -} - -static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) -{ - u64 addr = galpa.fw_handle + offset; - *(volatile u64 __force *)addr = value; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user); - -int hcp_galpas_dtor(struct h_galpas *galpas); - -u64 hcall_map_page(u64 physaddr); - -int hcall_unmap_page(u64 mapaddr); - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h deleted file mode 100644 index 9dac93d..0000000 --- a/drivers/infiniband/hw/ehca/hipz_fns.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_H__ -#define __HIPZ_FNS_H__ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -#include "hipz_fns_core.h" - -#define hipz_galpa_store_eq(gal, offset, value) \ - hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_eq(gal, offset) \ - hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qped(gal, offset, value) \ - hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) - -#define hipz_galpa_load_qped(gal, offset) \ - hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) - -#define hipz_galpa_store_mrmw(gal, offset, value) \ - hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) - -#define hipz_galpa_load_mrmw(gal, offset) \ - hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h deleted file mode 100644 index 868735f..0000000 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Heiko J Schick - * Hoang-Nam Nguyen - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_CORE_H__ -#define __HIPZ_FNS_CORE_H__ - -#include "hcp_phyp.h" -#include "hipz_hw.h" - -#define hipz_galpa_store_cq(gal, offset, value) \ - hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_cq(gal, offset) \ - hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qp(gal, offset, value) \ - hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) -#define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) - -static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, - EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); -} - -static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, - EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); -} - -static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) -{ - hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, - EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); -} - -static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n0_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, - EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, - value)); - cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); -} - -static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n1_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, - EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); - cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); -} - -#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h deleted file mode 100644 index bf996c7..0000000 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * eHCA register definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_HW_H__ -#define __HIPZ_HW_H__ - -#include "ehca_tools.h" - -#define EHCA_MAX_MTU 4 - -/* QP Table Entry Memory Map */ -struct hipz_qptemm { - u64 qpx_hcr; - u64 qpx_c; - u64 qpx_herr; - u64 qpx_aer; -/* 0x20*/ - u64 qpx_sqa; - u64 qpx_sqc; - u64 qpx_rqa; - u64 qpx_rqc; -/* 0x40*/ - u64 qpx_st; - u64 qpx_pmstate; - u64 qpx_pmfa; - u64 qpx_pkey; -/* 0x60*/ - u64 qpx_pkeya; - u64 qpx_pkeyb; - u64 qpx_pkeyc; - u64 qpx_pkeyd; -/* 0x80*/ - u64 qpx_qkey; - u64 qpx_dqp; - u64 qpx_dlidp; - u64 qpx_portp; -/* 0xa0*/ - u64 qpx_slidp; - u64 qpx_slidpp; - u64 qpx_dlida; - u64 qpx_porta; -/* 0xc0*/ - u64 qpx_slida; - u64 qpx_slidpa; - u64 qpx_slvl; - u64 qpx_ipd; -/* 0xe0*/ - u64 qpx_mtu; - u64 qpx_lato; - u64 qpx_rlimit; - u64 qpx_rnrlimit; -/* 0x100*/ - u64 qpx_t; - u64 qpx_sqhp; - u64 qpx_sqptp; - u64 qpx_nspsn; -/* 0x120*/ - u64 qpx_nspsnhwm; - u64 reserved1; - u64 qpx_sdsi; - u64 qpx_sdsbc; -/* 0x140*/ - u64 qpx_sqwsize; - u64 qpx_sqwts; - u64 qpx_lsn; - u64 qpx_nssn; -/* 0x160 */ - u64 qpx_mor; - u64 qpx_cor; - u64 qpx_sqsize; - u64 qpx_erc; -/* 0x180*/ - u64 qpx_rnrrc; - u64 qpx_ernrwt; - u64 qpx_rnrresp; - u64 qpx_lmsna; -/* 0x1a0 */ - u64 qpx_sqhpc; - u64 qpx_sqcptp; - u64 qpx_sigt; - u64 qpx_wqecnt; -/* 0x1c0*/ - u64 qpx_rqhp; - u64 qpx_rqptp; - u64 qpx_rqsize; - u64 qpx_nrr; -/* 0x1e0*/ - u64 qpx_rdmac; - u64 qpx_nrpsn; - u64 qpx_lapsn; - u64 qpx_lcr; -/* 0x200*/ - u64 qpx_rwc; - u64 qpx_rwva; - u64 qpx_rdsi; - u64 qpx_rdsbc; -/* 0x220*/ - u64 qpx_rqwsize; - u64 qpx_crmsn; - u64 qpx_rdd; - u64 qpx_larpsn; -/* 0x240*/ - u64 qpx_pd; - u64 qpx_scqn; - u64 qpx_rcqn; - u64 qpx_aeqn; -/* 0x260*/ - u64 qpx_aaelog; - u64 qpx_ram; - u64 qpx_rdmaqe0; - u64 qpx_rdmaqe1; -/* 0x280*/ - u64 qpx_rdmaqe2; - u64 qpx_rdmaqe3; - u64 qpx_nrpsnhwm; -/* 0x298*/ - u64 reserved[(0x400 - 0x298) / 8]; -/* 0x400 extended data */ - u64 reserved_ext[(0x500 - 0x400) / 8]; -/* 0x500 */ - u64 reserved2[(0x1000 - 0x500) / 8]; -/* 0x1000 */ -}; - -#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) - -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) - -/* MRMWPT Entry Memory Map */ -struct hipz_mrmwmm { - /* 0x00 */ - u64 mrx_hcr; - - u64 mrx_c; - u64 mrx_herr; - u64 mrx_aer; - /* 0x20 */ - u64 mrx_pp; - u64 reserved1; - u64 reserved2; - u64 reserved3; - /* 0x40 */ - u64 reserved4[(0x200 - 0x40) / 8]; - /* 0x200 */ - u64 mrx_ctl[64]; - -}; - -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) - -struct hipz_qpedmm { - /* 0x00 */ - u64 reserved0[(0x400) / 8]; - /* 0x400 */ - u64 qpedx_phh; - u64 qpedx_ppsgp; - /* 0x410 */ - u64 qpedx_ppsgu; - u64 qpedx_ppdgp; - /* 0x420 */ - u64 qpedx_ppdgu; - u64 qpedx_aph; - /* 0x430 */ - u64 qpedx_apsgp; - u64 qpedx_apsgu; - /* 0x440 */ - u64 qpedx_apdgp; - u64 qpedx_apdgu; - /* 0x450 */ - u64 qpedx_apav; - u64 qpedx_apsav; - /* 0x460 */ - u64 qpedx_hcr; - u64 reserved1[4]; - /* 0x488 */ - u64 qpedx_rrl0; - /* 0x490 */ - u64 qpedx_rrrkey0; - u64 qpedx_rrva0; - /* 0x4a0 */ - u64 reserved2; - u64 qpedx_rrl1; - /* 0x4b0 */ - u64 qpedx_rrrkey1; - u64 qpedx_rrva1; - /* 0x4c0 */ - u64 reserved3; - u64 qpedx_rrl2; - /* 0x4d0 */ - u64 qpedx_rrrkey2; - u64 qpedx_rrva2; - /* 0x4e0 */ - u64 reserved4; - u64 qpedx_rrl3; - /* 0x4f0 */ - u64 qpedx_rrrkey3; - u64 qpedx_rrva3; -}; - -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) - -/* CQ Table Entry Memory Map */ -struct hipz_cqtemm { - u64 cqx_hcr; - u64 cqx_c; - u64 cqx_herr; - u64 cqx_aer; -/* 0x20 */ - u64 cqx_ptp; - u64 cqx_tp; - u64 cqx_fec; - u64 cqx_feca; -/* 0x40 */ - u64 cqx_ep; - u64 cqx_eq; -/* 0x50 */ - u64 reserved1; - u64 cqx_n0; -/* 0x60 */ - u64 cqx_n1; - u64 reserved2[(0x1000 - 0x60) / 8]; -/* 0x1000 */ -}; - -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) -#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) - -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) - -/* EQ Table Entry Memory Map */ -struct hipz_eqtemm { - u64 eqx_hcr; - u64 eqx_c; - - u64 eqx_herr; - u64 eqx_aer; -/* 0x20 */ - u64 eqx_ptp; - u64 eqx_tp; - u64 eqx_ssba; - u64 eqx_psba; - -/* 0x40 */ - u64 eqx_cec; - u64 eqx_meql; - u64 eqx_xisbi; - u64 eqx_xisc; -/* 0x60 */ - u64 eqx_it; - -}; - -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) - -/* access control defines for MR/MW */ -#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 -#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 -#define HIPZ_ACCESSCTRL_R_READ 0x00200000 -#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 -#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 - -/* query hca response block */ -struct hipz_query_hca { - u32 cur_reliable_dg; - u32 cur_qp; - u32 cur_cq; - u32 cur_eq; - u32 cur_mr; - u32 cur_mw; - u32 cur_ee_context; - u32 cur_mcast_grp; - u32 cur_qp_attached_mcast_grp; - u32 reserved1; - u32 cur_ipv6_qp; - u32 cur_eth_qp; - u32 cur_hp_mr; - u32 reserved2[3]; - u32 max_rd_domain; - u32 max_qp; - u32 max_cq; - u32 max_eq; - u32 max_mr; - u32 max_hp_mr; - u32 max_mw; - u32 max_mrwpte; - u32 max_special_mrwpte; - u32 max_rd_ee_context; - u32 max_mcast_grp; - u32 max_total_mcast_qp_attach; - u32 max_mcast_qp_attach; - u32 max_raw_ipv6_qp; - u32 max_raw_ethy_qp; - u32 internal_clock_frequency; - u32 max_pd; - u32 max_ah; - u32 max_cqe; - u32 max_wqes_wq; - u32 max_partitions; - u32 max_rr_ee_context; - u32 max_rr_qp; - u32 max_rr_hca; - u32 max_act_wqs_ee_context; - u32 max_act_wqs_qp; - u32 max_sge; - u32 max_sge_rd; - u32 memory_page_size_supported; - u64 max_mr_size; - u32 local_ca_ack_delay; - u32 num_ports; - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - u64 node_guid; - u64 hca_cap_indicators; - u32 data_counter_register_size; - u32 max_shared_rq; - u32 max_isns_eq; - u32 max_neq; -} __attribute__ ((packed)); - -#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) -#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) -#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) -#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) -#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) -#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) -#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) -#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) -#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) -#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) -#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) -#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) -#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) -#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) -#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) -#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) -#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) -#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) - -/* query port response block */ -struct hipz_query_port { - u32 state; - u32 bad_pkey_cntr; - u32 lmc; - u32 lid; - u32 subnet_timeout; - u32 qkey_viol_cntr; - u32 sm_sl; - u32 sm_lid; - u32 capability_mask; - u32 init_type_reply; - u32 pkey_tbl_len; - u32 gid_tbl_len; - u64 gid_prefix; - u32 port_nr; - u16 pkey_entries[16]; - u8 reserved1[32]; - u32 trent_size; - u32 trbuf_size; - u64 max_msg_sz; - u32 max_mtu; - u32 vl_cap; - u32 phys_pstate; - u32 phys_state; - u32 phys_speed; - u32 phys_width; - u8 reserved2[1884]; - u64 guid_entries[255]; -} __attribute__ ((packed)); - -#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c deleted file mode 100644 index 7ffc748..0000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ipz_pt_fn.h" -#include "ehca_classes.h" - -#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) - -struct kmem_cache *small_qp_cache; - -void *ipz_qpageit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->pagesize; - if (queue->current_q_offset > queue->queue_length) { - queue->current_q_offset -= queue->pagesize; - ret = NULL; - } - if (((u64)ret) % queue->pagesize) { - ehca_gen_err("ERROR!! not at PAGE-Boundary"); - return NULL; - } - return ret; -} - -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u64 last_entry_in_q = queue->queue_length - queue->qe_size; - - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset > last_entry_in_q) { - queue->current_q_offset = 0; - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) -{ - int i; - for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = __pa(queue->queue_pages[i]); - if (addr >= page && addr < page + queue->pagesize) { - *q_offset = addr - page + i * queue->pagesize; - return 0; - } - } - return -EINVAL; -} - -#if PAGE_SHIFT < EHCA_PAGESHIFT -#error Kernel pages must be at least as large than eHCA pages (4K) ! -#endif - -/* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ -static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) -{ - int k, f = 0; - u8 *kpage; - - while (f < nr_of_pages) { - kpage = (u8 *)get_zeroed_page(GFP_KERNEL); - if (!kpage) - goto out; - - for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { - queue->queue_pages[f] = (struct ipz_page *)kpage; - kpage += EHCA_PAGESIZE; - f++; - } - } - return 1; - -out: - for (f = 0; f < nr_of_pages && queue->queue_pages[f]; - f += PAGES_PER_KPAGE) - free_page((unsigned long)(queue->queue_pages)[f]); - return 0; -} - -static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page; - unsigned long bit; - - mutex_lock(&pd->lock); - - if (!list_empty(&pd->free[order])) - page = list_entry(pd->free[order].next, - struct ipz_small_queue_page, list); - else { - page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); - if (!page) - goto out; - - page->page = get_zeroed_page(GFP_KERNEL); - if (!page->page) { - kmem_cache_free(small_qp_cache, page); - goto out; - } - - list_add(&page->list, &pd->free[order]); - } - - bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); - __set_bit(bit, page->bitmap); - page->fill++; - - if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) - list_move(&page->list, &pd->full[order]); - - mutex_unlock(&pd->lock); - - queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); - queue->small_page = page; - queue->offset = bit << (order + 9); - return 1; - -out: - ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); - mutex_unlock(&pd->lock); - return 0; -} - -static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page = queue->small_page; - unsigned long bit; - int free_page = 0; - - bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) - >> (order + 9); - - mutex_lock(&pd->lock); - - __clear_bit(bit, page->bitmap); - page->fill--; - - if (page->fill == 0) { - list_del(&page->list); - free_page = 1; - } - - if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) - /* the page was full until we freed the chunk */ - list_move_tail(&page->list, &pd->free[order]); - - mutex_unlock(&pd->lock); - - if (free_page) { - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } -} - -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small) -{ - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x " - "is greater than kernel page size", pagesize); - return 0; - } - - /* init queue fields */ - queue->queue_length = nr_of_pages * pagesize; - queue->pagesize = pagesize; - queue->qe_size = qe_size; - queue->act_nr_of_sg = nr_of_sg; - queue->current_q_offset = 0; - queue->toggle_state = 1; - queue->small_page = NULL; - - /* allocate queue page pointers */ - queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), - GFP_KERNEL | __GFP_NOWARN); - if (!queue->queue_pages) { - queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; - } - } - - /* allocate actual queue pages */ - if (is_small) { - if (!alloc_small_queue_page(queue, pd)) - goto ipz_queue_ctor_exit0; - } else - if (!alloc_queue_pages(queue, nr_of_pages)) - goto ipz_queue_ctor_exit0; - - return 1; - -ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't alloc pages queue=%p " - "nr_of_pages=%x", queue, nr_of_pages); - kvfree(queue->queue_pages); - - return 0; -} - -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) -{ - int i, nr_pages; - - if (!queue || !queue->queue_pages) { - ehca_gen_dbg("queue or queue_pages is NULL"); - return 0; - } - - if (queue->small_page) - free_small_queue_page(queue, pd); - else { - nr_pages = queue->queue_length / queue->pagesize; - for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) - free_page((unsigned long)queue->queue_pages[i]); - } - - kvfree(queue->queue_pages); - - return 1; -} - -int ehca_init_small_qp_cache(void) -{ - small_qp_cache = kmem_cache_create("ehca_cache_small_qp", - sizeof(struct ipz_small_queue_page), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!small_qp_cache) - return -ENOMEM; - - return 0; -} - -void ehca_cleanup_small_qp_cache(void) -{ - kmem_cache_destroy(small_qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h deleted file mode 100644 index a801274..0000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __IPZ_PT_FN_H__ -#define __IPZ_PT_FN_H__ - -#define EHCA_PAGESHIFT 12 -#define EHCA_PAGESIZE 4096UL -#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) -#define EHCA_PT_ENTRIES 512UL - -#include "ehca_tools.h" -#include "ehca_qes.h" - -struct ehca_pd; -struct ipz_small_queue_page; - -extern struct kmem_cache *small_qp_cache; - -/* struct generic ehca page */ -struct ipz_page { - u8 entries[EHCA_PAGESIZE]; -}; - -#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) - -struct ipz_small_queue_page { - unsigned long page; - unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; - int fill; - void *mapped_addr; - u32 mmap_count; - struct list_head list; -}; - -/* struct generic queue in linux kernel virtual memory (kv) */ -struct ipz_queue { - u64 current_q_offset; /* current queue entry */ - - struct ipz_page **queue_pages; /* array of pages belonging to queue */ - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; /* toggle flag - per page */ - u32 offset; /* save offset within page for small_qp */ - struct ipz_small_queue_page *small_page; -}; - -/* - * return current Queue Entry for a certain q_offset - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) -{ - struct ipz_page *current_page; - if (q_offset >= queue->queue_length) - return NULL; - current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; - return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; -} - -/* - * return current Queue Entry - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_get(struct ipz_queue *queue) -{ - return ipz_qeit_calc(queue, queue->current_q_offset); -} - -/* - * return current Queue Page , increment Queue Page iterator from - * page to page in struct ipz_queue, last increment will return 0! and - * NOT wrap - * returns address (kv) of Queue Page - * warning don't use in parallel with ipz_QE_get_inc() - */ -void *ipz_qpageit_get_inc(struct ipz_queue *queue); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset >= queue->queue_length) { - queue->current_q_offset = 0; - /* toggle the valid flag */ - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -/* - * return a bool indicating whether current Queue Entry is valid - */ -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); -} - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) -{ - return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; -} - -/* - * returns and resets Queue Entry iterator - * returns address (kv) of first Queue Entry - */ -static inline void *ipz_qeit_reset(struct ipz_queue *queue) -{ - queue->current_q_offset = 0; - return ipz_qeit_get(queue); -} - -/* - * return the q_offset corresponding to an absolute address - */ -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); - -/* - * return the next queue offset. don't modify the queue. - */ -static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) -{ - offset += queue->qe_size; - if (offset >= queue->queue_length) offset = 0; - return offset; -} - -/* struct generic page table */ -struct ipz_pt { - u64 entries[EHCA_PT_ENTRIES]; -}; - -/* struct page table for a queue, only to be used in pf */ -struct ipz_qpt { - /* queue page tables (kv), use u64 because we know the element length */ - u64 *qpts; - u32 n_qpts; - u32 n_ptes; /* number of page table entries */ - u64 *current_pte_addr; -}; - -/* - * constructor for a ipz_queue_t, placement new for ipz_queue_t, - * new for all dependent datastructors - * all QP Tables are the same - * flow: - * allocate+pin queue - * see ipz_qpt_ctor() - * returns true if ok, false if out of memory - */ -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small); - -/* - * destructor for a ipz_queue_t - * -# free queue - * see ipz_queue_ctor() - * returns true if ok, false if queue was NULL-ptr of free failed - */ -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); - -/* - * constructor for a ipz_qpt_t, - * placement new for struct ipz_queue, new for all dependent datastructors - * all QP Tables are the same, - * flow: - * -# allocate+pin queue - * -# initialise ptcb - * -# allocate+pin PTs - * -# link PTs to a ring, according to HCA Arch, set bit62 id needed - * -# the ring must have room for exactly nr_of_PTEs - * see ipz_qpt_ctor() - */ -void ipz_qpt_ctor(struct ipz_qpt *qpt, - const u32 nr_of_qes, - const u32 pagesize, - const u32 qe_size, - const u8 lowbyte, const u8 toggle, - u32 * act_nr_of_QEs, u32 * act_nr_of_pages); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - * fix EQ page problems - */ -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); - -/* - * return current Event Queue Entry, increment Queue Entry iterator - * by one step in struct ipz_queue if valid, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_queue_QPageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - */ -static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - ipz_qeit_eq_get_inc(queue); /* this is a good one */ - return ret; -} - -static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - return ret; -} - -/* returns address (GX) of first queue entry */ -static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) -{ - return be64_to_cpu(qpt->qpts[0]); -} - -/* returns address (kv) of first page of queue page table */ -static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) -{ - return qpt->qpts; -} - -#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index cf5fe9b..d7f6235 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -24,6 +24,8 @@ if STAGING_RDMA source "drivers/staging/rdma/amso1100/Kconfig" +source "drivers/staging/rdma/ehca/Kconfig" + source "drivers/staging/rdma/hfi1/Kconfig" source "drivers/staging/rdma/ipath/Kconfig" diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index cbd915a..139d78e 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,4 +1,5 @@ # Entries for RDMA_STAGING tree obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig new file mode 100644 index 0000000..3fadd2a --- /dev/null +++ b/drivers/staging/rdma/ehca/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS + ---help--- + This driver supports the deprecated IBM pSeries eHCA InfiniBand + adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile new file mode 100644 index 0000000..74d284e --- /dev/null +++ b/drivers/staging/rdma/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# Joachim Fenkes +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO new file mode 100644 index 0000000..199a4a6 --- /dev/null +++ b/drivers/staging/rdma/ehca/TODO @@ -0,0 +1,4 @@ +9/2015 + +The ehca driver has been deprecated and moved to drivers/staging/rdma. +It will be removed in the 4.6 merge window. diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c new file mode 100644 index 0000000..4659263 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_av.c @@ -0,0 +1,277 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * address vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, GFP_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + av->av.pmtu = shca->max_mtu; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = shca->max_mtu; + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h new file mode 100644 index 0000000..bd45e0f --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes.h @@ -0,0 +1,482 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#include +#include + +#include +#include + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" +#include "ehca_irq.h" + +#define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff + +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; +}; + +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; + enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; +}; + +#define HCA_CAP_MR_PGSIZE_4K 0x80000000 +#define HCA_CAP_MR_PGSIZE_64K 0x40000000 +#define HCA_CAP_MR_PGSIZE_1M 0x20000000 +#define HCA_CAP_MR_PGSIZE_16M 0x10000000 + +struct ehca_shca { + struct ib_device ib_device; + struct platform_device *ofdev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; + struct mutex modify_mutex; + u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; + int max_mtu; + int max_num_qps; + int max_num_cqs; + atomic_t num_cqs; + atomic_t num_qps; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + +struct ehca_qp { + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; + u32 qp_type; + enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; + struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; + struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; +}; + +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ + wait_queue_head_t wait_completion; + spinlock_t task_lock; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + struct ib_umem *umem; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_kpages; + u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct scatterlist *next_sg; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); + +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; + +struct ipzu_queue_resp { + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 offset; /* save offset within a page for small_qp */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + u32 fw_handle_ofs; + u32 dummy; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 ext_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 fw_handle_ofs; + u32 dummy; + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + +struct ehca_alloc_qp_parms { + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ + enum ehca_service_type servicetype; + int qp_storage; + int sigtype; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + int ud_av_l_key_ctl; + + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + + /* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h new file mode 100644 index 0000000..689c357 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h @@ -0,0 +1,208 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c new file mode 100644 index 0000000..9b68b17 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_cq.c @@ -0,0 +1,397 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int cqe = attr->cqe; + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, i; + unsigned long flags; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", shca->max_num_cqs); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + atomic_dec(&shca->num_cqs); + return ERR_PTR(-ENOMEM); + } + + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); + init_waitqueue_head(&my_cq->wait_completion); + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); + + if (my_cq->token < 0) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lli device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lli", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lli " + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + resp.fw_handle_ofs = (u32) + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); + goto create_cq_exit4; + } + } + + return cq; + +create_cq_exit4: + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + unsigned long flags; + + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + } + + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + + /* nobody's using our CQ any longer -- we can destroy it */ + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c new file mode 100644 index 0000000..90da674 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_eq.c @@ -0,0 +1,189 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + int ret; + u64 h_ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) + goto create_eq_exit2; + + rpage = __pa(vpage); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (h_ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (h_ret != H_PAGE_REGISTERED) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, + 0, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, + 0, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(NULL, &eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + ibmebus_free_irq(eq->ist, (void *)shca); + + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); + + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(NULL, &eq->ipz_queue); + + return 0; +} diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c new file mode 100644 index 0000000..e8b1bb6 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_hca.c @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static unsigned int limit_uint(unsigned int value) +{ + return min_t(unsigned int, value, INT_MAX); +} + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + int i, ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->page_size_cap = shca->hca_cap_mr_pgsize; + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = limit_uint(rblock->max_qp); + props->max_qp_wr = limit_uint(rblock->max_wqes_wq); + props->max_sge = limit_uint(rblock->max_sge); + props->max_sge_rd = limit_uint(rblock->max_sge_rd); + props->max_cq = limit_uint(rblock->max_cq); + props->max_cqe = limit_uint(rblock->max_cqe); + props->max_mr = limit_uint(rblock->max_mr); + props->max_mw = limit_uint(rblock->max_mw); + props->max_pd = limit_uint(rblock->max_pd); + props->max_ah = limit_uint(rblock->max_ah); + props->max_ee = limit_uint(rblock->max_rd_ee_context); + props->max_rdd = limit_uint(rblock->max_rd_domain); + props->max_fmr = limit_uint(rblock->max_mr); + props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); + props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); + props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); + props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); + props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + props->max_srq = limit_uint(props->max_qp); + props->max_srq_wr = limit_uint(props->max_qp_wr); + props->max_srq_sge = 3; + } + + props->max_pkeys = 16; + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; + props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); + props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); + props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); + props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); + props->max_total_mcast_qp_attach + = limit_uint(rblock->max_total_mcast_qp_attach); + + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + +query_device1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) +{ + switch (fw_mtu) { + case 0x1: + return IB_MTU_256; + case 0x2: + return IB_MTU_512; + case 0x3: + return IB_MTU_1024; + case 0x4: + return IB_MTU_2048; + case 0x5: + return IB_MTU_4096; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + fw_mtu); + return 0; + } +} + +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +{ + switch (vl_cap) { + case 0x1: + return 1; + case 0x2: + return 2; + case 0x3: + return 4; + case 0x4: + return 8; + case 0x5: + return 15; + default: + ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", + vl_cap); + return 0; + } +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + + props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); + props->port_cap_flags = rblock->capability_mask; + props->gid_tbl_len = rblock->gid_tbl_len; + if (rblock->max_msg_sz) + props->max_msg_sz = rblock->max_msg_sz; + else + props->max_msg_sz = 0x1 << 31; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); + + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = IB_SPEED_SDR; + } + +query_port1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index < 0 || index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", + hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c new file mode 100644 index 0000000..8615d7c --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.c @@ -0,0 +1,870 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool *pool; + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp *)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%llx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq *)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%llx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown error type: %llx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%llx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + /* rblock must be 4K aligned and should be 4K large */ + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) + ehca_err(&shca->ib_device, + "No error data is available: %llx.", resource); + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; + + print_error_data(shca, data, rblock, length); + } else + ehca_err(&shca->ib_device, + "Error data could not be fetched: %llx", resource); + + ehca_free_fw_ctrlblock(rblock); + +error_data1: + return ret; + +} + +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, + enum ib_event_type event_type) +{ + struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ + struct ehca_qp *qp; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); + + if (!qp) + return; + + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); + + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) +{ + struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); + } else { + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + struct ehca_cq *cq; + + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (cq == NULL) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} + +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value, ret; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock(&eq->irq_spinlock); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (!eqe_cache[eqe_cnt].cq) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); + } + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); + +unlock_irq_spinlock: + spin_unlock(&eq->irq_spinlock); +} + +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} + +static int find_next_online_cpu(struct ehca_comp_pool *pool) +{ + int cpu; + unsigned long flags; + + WARN_ON_ONCE(!in_interrupt()); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags); + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); + + return cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) +{ + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up_process(thread); + } else + __cq->nr_callbacks++; + + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu_id; + struct ehca_cpu_comp_task *cct; + struct task_struct *thread; + int cq_jobs; + unsigned long flags; + + cpu_id = find_next_online_cpu(pool); + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + } + __queue_comp_task(__cq, cct, thread); +} + +static void run_comp_task(struct ehca_cpu_comp_task *cct) +{ + struct ehca_cq *cq; + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irq(&cct->task_lock); + + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + spin_lock_irq(&cct->task_lock); + spin_lock(&cq->task_lock); + cq->nr_callbacks--; + if (!cq->nr_callbacks) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock(&cq->task_lock); + } +} + +static void comp_task_park(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); + + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); + } + spin_unlock_irq(&target->task_lock); +} + +static void comp_task_stop(unsigned int cpu, bool online) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); +} + +static int comp_task_should_run(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + return cct->cq_jobs; +} + +static void comp_task(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); + } + spin_unlock_irq(&cct->task_lock); +} + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; + +int ehca_create_comp_pool(void) +{ + int cpu, ret = -ENOMEM; + + if (!ehca_scaling_code) + return 0; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = cpumask_any(cpu_online_mask); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (!pool->cpu_comp_tasks) + goto out_pool; + + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + } + + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; + + pr_info("eHCA scaling code enabled\n"); + return ret; + +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; +} + +void ehca_destroy_comp_pool(void) +{ + if (!ehca_scaling_code) + return; + + smpboot_unregister_percpu_thread(&comp_pool_threads); + + free_percpu(pool->cpu_comp_threads); + free_percpu(pool->cpu_comp_tasks); + kfree(pool); +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h new file mode 100644 index 0000000..5370199 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); +void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); + +struct ehca_cpu_comp_task { + struct list_head cq_list; + spinlock_t task_lock; + int cq_jobs; + int active; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h new file mode 100644 index 0000000..80e6a3d --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_iverbs.h @@ -0,0 +1,218 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +enum rdma_protocol_type +ehca_query_protocol(struct ib_device *device, u8 port_num); + +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); + +void ehca_poll_eqs(unsigned long data); + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + +#ifdef CONFIG_PPC_64K_PAGES +void *ehca_alloc_fw_ctrlblock(gfp_t flags); +void ehca_free_fw_ctrlblock(void *ptr); +#else +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) +#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) +#endif + +void ehca_recover_sqp(struct ib_qp *sqp); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c new file mode 100644 index 0000000..8246418 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_main.c @@ -0,0 +1,1123 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_PPC_64K_PAGES +#include +#endif + +#include +#include +#include +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +#define HCAD_VERSION "0029" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION(HCAD_VERSION); + +static bool ehca_open_aqp1 = 0; +static int ehca_hw_level = 0; +static bool ehca_poll_all_eqs = 1; + +int ehca_debug_level = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_static_rate = -1; +bool ehca_scaling_code = 0; +int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); + +MODULE_PARM_DESC(open_aqp1, + "Open AQP1 on startup (default: no)"); +MODULE_PARM_DESC(debug_level, + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); +MODULE_PARM_DESC(hw_level, + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); +MODULE_PARM_DESC(use_hp_mr, + "Use high performance MRs (default: no)"); +MODULE_PARM_DESC(port_act_time, + "Time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "Poll all event queues periodically (default: yes)"); +MODULE_PARM_DESC(static_rate, + "Set permanent static rate (default: no static rate)"); +MODULE_PARM_DESC(scaling_code, + "Enable scaling code (default: no)"); +MODULE_PARM_DESC(lock_hcalls, + "Serialize all hCalls made by the driver " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); + +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +DEFINE_SPINLOCK(shca_list_lock); + +static struct timer_list poll_eqs_timer; + +#ifdef CONFIG_PPC_64K_PAGES +static struct kmem_cache *ctblk_cache; + +void *ehca_alloc_fw_ctrlblock(gfp_t flags) +{ + void *ret = kmem_cache_zalloc(ctblk_cache, flags); + if (!ret) + ehca_gen_err("Out of memory for ctblk"); + return ret; +} + +void ehca_free_fw_ctrlblock(void *ptr) +{ + if (ptr) + kmem_cache_free(ctblk_cache, ptr); + +} +#endif + +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + +#ifdef CONFIG_PPC_64K_PAGES + ctblk_cache = kmem_cache_create("ehca_cache_ctblk", + EHCA_PAGESIZE, H_CB_ALIGNMENT, + SLAB_HWCACHE_ALIGN, + NULL); + if (!ctblk_cache) { + ehca_gen_err("Cannot create ctblk SLAB cache."); + ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; + goto create_slab_caches6; + } +#endif + return 0; + +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_small_qp_cache(); + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +#ifdef CONFIG_PPC_64K_PAGES + if (ctblk_cache) + kmem_cache_destroy(ctblk_cache); +#endif +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) + +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, +}; + +static int ehca_sense_attributes(struct ehca_shca *shca) +{ + int i, ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + struct hipz_query_port *port; + const char *loc_code; + + static const u32 pgsize_map[] = { + HCA_CAP_MR_PGSIZE_4K, 0x1000, + HCA_CAP_MR_PGSIZE_64K, 0x10000, + HCA_CAP_MR_PGSIZE_1M, 0x100000, + HCA_CAP_MR_PGSIZE_16M, 0x1000000, + }; + + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } + } else + shca->hw_level = ehca_hw_level; + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); + + /* translate supported MR page sizes; always support 4K */ + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; + } + + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); + } + + /* query max MTU from first port -- it's the same for all ports */ + port = (struct hipz_query_port *)rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ehca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static int ehca_init_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 8; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = RDMA_NODE_IB_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; + shca->ib_device.dma_device = &shca->ofdev->dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + shca->ib_device.process_mad = ehca_process_mad; + shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + shca->ib_device.get_port_immutable = ehca_port_immutable; + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + struct ib_cq_init_attr cq_attr = {}; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + cq_attr.cqe = 10; + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), + &cq_attr); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_sqp[IB_QPT_GSI]) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +static struct attribute *ehca_drv_attrs[] = { + &driver_attr_debug_level.attr, + NULL +}; + +static struct attribute_group ehca_drv_attr_grp = { + .attrs = ehca_drv_attrs +}; + +static const struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev_get_drvdata(dev); \ + \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory.\n"); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties\n"); \ + ehca_free_fw_ctrlblock(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + ehca_free_fw_ctrlblock(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev_get_drvdata(dev); + + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + +static struct attribute *ehca_dev_attrs[] = { + &dev_attr_adapter_handle.attr, + &dev_attr_num_ports.attr, + &dev_attr_hw_ver.attr, + &dev_attr_max_eq.attr, + &dev_attr_cur_eq.attr, + &dev_attr_max_cq.attr, + &dev_attr_cur_cq.attr, + &dev_attr_max_qp.attr, + &dev_attr_cur_qp.attr, + &dev_attr_max_mr.attr, + &dev_attr_cur_mr.attr, + &dev_attr_max_mw.attr, + &dev_attr_cur_mw.attr, + &dev_attr_max_pd.attr, + &dev_attr_max_ah.attr, + NULL +}; + +static struct attribute_group ehca_dev_attr_grp = { + .attrs = ehca_dev_attrs +}; + +static int ehca_probe(struct platform_device *dev) +{ + struct ehca_shca *shca; + const u64 *handle; + struct ib_pd *ibpd; + int ret, i, eq_size; + unsigned long flags; + + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); + + shca->ofdev = dev; + shca->ipz_hca_handle.handle = *handle; + dev_set_drvdata(&dev->dev, shca); + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_init_device(shca); + if (ret) { + ehca_gen_err("Cannot init ehca device struct"); + goto probe1; + } + + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe1; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", + ret); + goto probe5; + } + + ret = ib_register_device(&shca->ib_device, NULL); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%i", ret); + goto probe6; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe7; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe8; + } + } + + ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_err(&shca->ib_device, + "Cannot create device attributes ret=%d", ret); + + spin_lock_irqsave(&shca_list_lock, flags); + list_add(&shca->shca_list, &shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return 0; + +probe8: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%i", ret); + +probe7: + ib_unregister_device(&shca->ib_device); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int ehca_remove(struct platform_device *dev) +{ + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; + int ret; + + sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%i", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%i", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock_irqsave(&shca_list_lock, flags); + list_del(&shca->shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehca_device_table); + +static struct platform_driver ehca_driver = { + .probe = ehca_probe, + .remove = ehca_remove, + .driver = { + .name = "ehca", + .owner = THIS_MODULE, + .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, + }, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + unsigned long flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } + } + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); + spin_unlock(&shca_list_lock); +} + +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + +static int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Version " HCAD_VERSION ")\n"); + + ret = ehca_create_comp_pool(); + if (ret) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + ret = ehca_create_slab_caches(); + if (ret) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init3; + } + + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; + } + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +static void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ibmebus_unregister_driver(&ehca_driver); + + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c new file mode 100644 index 0000000..cec1815 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c new file mode 100644 index 0000000..f914b30 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.c @@ -0,0 +1,2593 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) + +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +#define EHCA_MR_PGSHIFT4K 12 +#define EHCA_MR_PGSHIFT64K 16 +#define EHCA_MR_PGSHIFT1M 20 +#define EHCA_MR_PGSHIFT16M 24 + +static u64 ehca_map_vaddr(void *caddr); + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + int log = ilog2(pgsize); + WARN_ON(log < 12 || log > 24 || log & 3); + return (log - 12) / 4; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); +} + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mrlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mwlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ehca_mr_delete(e_maxmr); + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + int ret, page_shift; + u32 num_kpages; + u32 num_hwpages; + u64 hwpage_size; + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if (length == 0 || virt + length < virt) { + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + e_mr->umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(e_mr->umem)) { + ib_mr = (void *)e_mr->umem; + goto reg_user_mr_exit1; + } + + if (e_mr->umem->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "e_mr->umem->page_size=%x", e_mr->umem->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit2; + } + + /* determine number of MR pages */ + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + /* select proper hw_pgsize */ + page_shift = PAGE_SHIFT; + if (e_mr->umem->hugetlb) { + /* determine page_shift, clamp between 4K and 16M */ + page_shift = (fls64(length - 1) + 3) & ~3; + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), + EHCA_MR_PGSHIFT16M); + } + hwpage_size = 1UL << page_shift; + + /* now that we have the desired page size, shift until it's + * supported, too. 4K is always supported, so this terminates. + */ + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) + hwpage_size >>= 4; + +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%llx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit2; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit2: + ib_umem_release(e_mr->umem); +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", + PTR_ERR(ib_mr), pd, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%llx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout; + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto dereg_mr_exit0; + } + + if (e_mr->umem) + ib_umem_release(e_mr->umem); + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout; + + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca2ib_return_code(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + hw_pgsize = 1 << fmr_attr->page_shift; + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.hwpage_size = hw_pgsize; + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->hwpage_size = hw_pgsize; + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; + + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " + "iova=%llx", ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca2ib_return_code(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max MAX_RPAGES ehca mr pages per register call */ + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ + if (rnum == 0) + rnum = MAX_RPAGES; /* last shot is full */ + } else + rnum = MAX_RPAGES; + + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%i rnum=%x kpage=%p", + ret, rnum, kpage); + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else + rpage = *kpage; + + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64 *)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; + + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret == H_SUCCESS) { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + return 0; + } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ + } + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo; + struct ib_phys_buf ib_pbuf; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca2ib_return_code(h_ret); + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%i e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%llx pbuf->size=%llx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%llx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr; + u32 j = 0; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) + break; + } + + return ret; +} + +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, + u64 *prev_pgaddr) +{ + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)__va(pgaddr)); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr, prev_pgaddr; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); + } + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); + } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; + +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) + break; + } + + return ret; +} + +static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; + } + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, + pbuf->size, pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; + } + } + return ret; +} + +static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%llx p=%llx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; + } + kpage++; + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + break; + } + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ +{ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h new file mode 100644 index 0000000..50d8b51 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.h @@ -0,0 +1,132 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey, + enum ehca_reg_type reg_type); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c new file mode 100644 index 0000000..351577a --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_pd.c @@ -0,0 +1,124 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + int i; + + pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + struct ipz_small_queue_page *page, *tmp; + + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h new file mode 100644 index 0000000..90c4efa --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qes.h @@ -0,0 +1,260 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote addresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_address; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c new file mode 100644 index 0000000..2e89356 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qp.c @@ -0,0 +1,2257 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Joachim Fenkes + * Stefan Roscher + * Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETHERTYPE: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; + resp->offset = queue->offset; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + struct ehca_alloc_queue_parms *parms, + int wqe_size) +{ + int ret, cnt, ipz_rc, nr_q_pages; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + if (!parms->queue_size) + return 0; + + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); + } + + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", + ipz_rc); + return -EBUSY; + } + + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); + if (!vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + NULL, 0, q_type, + rpage, parms->is_small ? 0 : 1, + my_qp->galpas.kernel); + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + } + } + + ipz_qeit_reset(queue); + + return 0; + +init_qp_queue1: + ipz_queue_dtor(pd, queue); + return ret; +} + +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) +{ + struct ehca_qp *my_qp, *my_srq = NULL; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int is_llqp = 0, has_srq = 0, is_user = 0; + int qp_type, max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; + unsigned long flags; + + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", shca->max_num_qps); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; + + /* handle SRQ base QPs */ + if (init_attr->srq) { + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + } + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } else { + int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI + || qp_type == IB_QPT_GSI) ? 250 : 252; + + if (init_attr->cap.max_send_sge > max_sge + || init_attr->cap.max_recv_sge > max_sge) { + ehca_err(pd->device, "Invalid number of SGEs requested " + "send_sge=%x recv_sge=%x max_sge=%x", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOMEM); + } + + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; + + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); + + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; + + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } + goto create_qp_exit0; + } + + if (has_srq) + parms.srq_token = my_qp->token; + + parms.servicetype = ibqptype2servicetype(qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); + goto create_qp_exit1; + } + + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (parms.servicetype == ST_UD && !is_llqp) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; + parms.pd = my_pd->fw_pd; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { + if (HAS_SQ(my_qp)) + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + if (HAS_RQ(my_qp)) + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; + + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + + switch (qp_type) { + case IB_QPT_RC: + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; + break; + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } else { + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; + } + + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + &parms.squeue, swqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%i", ret); + goto create_qp_exit2; + } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } + } + + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%i", ret); + goto create_qp_exit4; + } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; + } + + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; + + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; + my_qp->init_attr = *init_attr; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit5; + } + } + } + + /* NOTE: define_apq0() not supported yet */ + if (qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit6; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); + if (ret) { + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%i", ret); + goto create_qp_exit7; + } + } + + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ehca_create_qp_resp resp; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + resp.fw_handle_ofs = (u32) + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit8; + } + } + + return my_qp; + +create_qp_exit8: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit7: + kfree(my_qp->mod_qp_parm); + +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + +create_qp_exit3: + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return ERR_PTR(ret); +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *)my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = 3; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + ehca_free_fw_ctrlblock(mqpcb); + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + u64 q_ofs; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = __va((u64)bad_send_wqe_p); + if (ehca_debug_level >= 2) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { + ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" + " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + return -EFAULT; + } + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level >= 2) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + q_ofs = ipz_queue_advance_offset(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = __va(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int is_user = 0; + int squeue_locked = 0; + unsigned long flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!mqpcb) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + if (ibqp->uobject) + is_user = 1; + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%i", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (!ibqp->uobject) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe *) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%i", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1; + + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num < 1 + || attr->alt_port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->alt_port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->alt_phys_port = attr->alt_port_num; + + if (attr->alt_pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->alt_p_key_idx = attr->alt_pkey_index; + + mqpcb->timeout_al = attr->alt_timeout; + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + mqpcb->service_level_al = attr->alt_ah_attr.sl; + + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + + /* OpenIB doesn't support alternate retry counts - copy them */ + mqpcb->retry_count_al = mqpcb->retry_count; + mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) + | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1; + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state != IB_MIG_REARM + && attr->path_mig_state != IB_MIG_MIGRATED) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid mig_state=%x", + attr->path_mig_state); + goto modify_qp_exit2; + } + mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + int ret = 0; + + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + goto out; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; +} + +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int cnt, ret = 0; + u64 h_ret; + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device, "Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(qp->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state - 1; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = attr->srq_limit; + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int ret = 0; + u64 h_ret; + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->max_sge = 3; + srq_attr->srq_limit = qpcb->curr_srq_limit; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; + u32 qp_num = my_qp->real_qp_num; + int ret; + u64 h_ret; + u8 port_num; + int is_user = 0; + enum ib_qp_type qp_type; + unsigned long flags; + + if (uobject) { + is_user = 1; + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); + return -EINVAL; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); + if (ret) { + ehca_err(dev, "Couldn't unassign qp from " + "send_cq ret=%i qp_num=%x cq_num=%x", ret, + qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(dev, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + if (HAS_RQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return 0; +} + +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c new file mode 100644 index 0000000..47f9498 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_reqs.c @@ -0,0 +1,953 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Hoang-Nam Nguyen + * Waleri Fomin + * Joachim Fenkes + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x", idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = __va(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (unlikely(!send_wr->wr.ud.ah)) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_address = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + int wqe_cnt = 0; + int ret = 0; + unsigned long flags; + + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + + /* loop processes list of send reqs */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); + if (unlikely(ret)) { + goto post_send_exit0; + } + wqe_cnt++; + send_wr = send_wr->next; + } + +post_send_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; + return ret; +} + +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + u32 rq_map_idx; + unsigned long flags; + struct ehca_qmap_entry *qmap_entry; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + ret = -ENODEV; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, flags); + + /* loop processes list of recv reqs */ + while (recv_wr) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + + wqe_cnt++; + recv_wr = recv_wr->next; + } /* eof for recv_wr */ + +post_recv_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0, qmap_tail_idx; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; + +repoll: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp; + int purgeflag; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto repoll; + } + spin_lock_irqsave(&qp->spinlock_s, flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, flags); + + if (purgeflag) { + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level >= 2) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto repoll; + } + } + + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { + ehca_dbg(cq->device, + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto repoll; + } + + /* eval ib_wc_status */ + if (unlikely(is_error)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ehca_qp *err_qp; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long flags; + int entries_left = num_entries; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, flags); + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + entries_left -= nr; + + spin_unlock_irqrestore(&my_cq->spinlock, flags); + if (ret == -EAGAIN || !ret) + ret = num_entries - entries_left; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int ret = 0; + + switch (notify_flags & IB_CQ_SOLICITED_MASK) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c new file mode 100644 index 0000000..376b031 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_sqp.c @@ -0,0 +1,245 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) + +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. h_ret=%lli", + port, ret); + return ret; + } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + const struct vertcfl *vertcfl = + (const struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h new file mode 100644 index 0000000..d280b12 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_tools.h @@ -0,0 +1,155 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char *)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk(KERN_INFO "EHCA_DMP:%s " format \ + " adr=%p ofs=%04x %016llx %016llx\n", \ + __func__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) + +/** + * EHCA_BMASK_GET - extract a parameter from value by mask + */ +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) + +/* Converts ehca to ib return code */ +int ehca2ib_return_code(u64 ehca_rc); + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c new file mode 100644 index 0000000..1a1d5d9 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_uverbs.c @@ -0,0 +1,309 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +static void ehca_mm_open(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static const struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize < EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, + vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%i", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%i", ret); + return ret; + } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_fw() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } + + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; + } + break; + + case 1: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + case 2: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff; + u32 idr_handle = fileoffset & 0x1FFFFFF; + u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ + u32 ret; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ib_uobject *uobject; + + switch (q_type) { + case 0: /* CQ */ + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, idr_handle); + read_unlock(&ehca_cq_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* QP */ + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, idr_handle); + read_unlock(&ehca_qp_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c new file mode 100644 index 0000000..89517ff --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.c @@ -0,0 +1,949 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Joachim Fenkes + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + +#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" +#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" +#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" + +static DEFINE_SPINLOCK(hcall_lock); + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, + opcode, ret, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + else + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + + return ret; + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) { + ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + } else if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + int rc; + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user) +{ + int rc; + u64 ret; + u64 allocate_controls, max_r10_reg, r11, r12; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + parms->squeue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + parms->rqueue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->squeue.max_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->rqueue.max_sge); + + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; + parms->squeue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->rqueue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->squeue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->rqueue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->squeue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->rqueue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = __pa(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level >= 2) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = __pa(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%llx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count > 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void *)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void *)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + __pa(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + __pa(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lli", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if (unlikely(ehca_debug_level >= 3)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = __va(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%llx mr=%p mr_handle=%llx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%llx count=%llx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = __pa(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h new file mode 100644 index 0000000..a46e514 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.h @@ -0,0 +1,265 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); +u64 hipz_h_eoi(int irq); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c new file mode 100644 index 0000000..077376f --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.c @@ -0,0 +1,82 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +u64 hcall_map_page(u64 physaddr) +{ + return (u64)ioremap(physaddr, EHCA_PAGESIZE); +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem *) mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user) +{ + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h new file mode 100644 index 0000000..d1b0299 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +u64 hcall_map_page(u64 physaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h new file mode 100644 index 0000000..9dac93d --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h new file mode 100644 index 0000000..868735f --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal, offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h new file mode 100644 index 0000000..bf996c7 --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_hw.h @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +#define EHCA_MAX_MTU 4 + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c new file mode 100644 index 0000000..7ffc748 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.c @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % queue->pagesize) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) +{ + int i; + for (i = 0; i < queue->queue_length / queue->pagesize; i++) { + u64 page = __pa(queue->queue_pages[i]); + if (addr >= page && addr < page + queue->pagesize) { + *q_offset = addr - page + i * queue->pagesize; + return 0; + } + } + return -EINVAL; +} + +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) +{ + int k, f = 0; + u8 *kpage; + + while (f < nr_of_pages) { + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + return 1; + +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + queue->offset = bit << (order + 9); + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->current_q_offset = 0; + queue->toggle_state = 1; + queue->small_page = NULL; + + /* allocate queue page pointers */ + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); + if (!queue->queue_pages) { + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } + } + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + kvfree(queue->queue_pages); + + return 0; +} + +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) +{ + int i, nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + + kvfree(queue->queue_pages); + + return 1; +} + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h new file mode 100644 index 0000000..a801274 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.h @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +struct ehca_pd; +struct ipz_small_queue_page; + +extern struct kmem_cache *small_qp_cache; + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* + * return the q_offset corresponding to an absolute address + */ +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); + +/* + * return the next queue offset. don't modify the queue. + */ +static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) +{ + offset += queue->qe_size; + if (offset >= queue->queue_length) offset = 0; + return offset; +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ -- cgit v0.10.2 From 7c0d35a339db612aae5496424030307128f088a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Sep 2015 13:07:36 -0700 Subject: MODSIGN: fix a compilation warning in extract-cert Fix the following warning when compiling extract-cert: scripts/extract-cert.c: In function `write_cert': scripts/extract-cert.c:89:2: warning: format not a string literal and no format arguments [-Wformat-security] ERR(!i2d_X509_bio(wb, x509), cert_dst); ^ whereby the ERR() macro is taking cert_dst as the format string. "%s" should be used as the format string as the path could contain special characters. Signed-off-by: David Howells Reported-by: Jim Davis Acked-by : David Woodhouse Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index fd0db01..10d23ca 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -86,7 +86,7 @@ static void write_cert(X509 *x509) ERR(!wb, "%s", cert_dst); } X509_NAME_oneline(X509_get_subject_name(x509), buf, sizeof(buf)); - ERR(!i2d_X509_bio(wb, x509), cert_dst); + ERR(!i2d_X509_bio(wb, x509), "%s", cert_dst); if (kbuild_verbose) fprintf(stderr, "Extracted cert: %s\n", buf); } -- cgit v0.10.2 From 5b25b13ab08f616efd566347d809b4ece54570d1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 11 Sep 2015 13:07:39 -0700 Subject: sys_membarrier(): system-wide memory barrier (generic, x86) Here is an implementation of a new system call, sys_membarrier(), which executes a memory barrier on all threads running on the system. It is implemented by calling synchronize_sched(). It can be used to distribute the cost of user-space memory barriers asymmetrically by transforming pairs of memory barriers into pairs consisting of sys_membarrier() and a compiler barrier. For synchronization primitives that distinguish between read-side and write-side (e.g. userspace RCU [1], rwlocks), the read-side can be accelerated significantly by moving the bulk of the memory barrier overhead to the write-side. The existing applications of which I am aware that would be improved by this system call are as follows: * Through Userspace RCU library (http://urcu.so) - DNS server (Knot DNS) https://www.knot-dns.cz/ - Network sniffer (http://netsniff-ng.org/) - Distributed object storage (https://sheepdog.github.io/sheepdog/) - User-space tracing (http://lttng.org) - Network storage system (https://www.gluster.org/) - Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf) - Financial software (https://lkml.org/lkml/2015/3/23/189) Those projects use RCU in userspace to increase read-side speed and scalability compared to locking. Especially in the case of RCU used by libraries, sys_membarrier can speed up the read-side by moving the bulk of the memory barrier cost to synchronize_rcu(). * Direct users of sys_membarrier - core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198) Microsoft core dotnet GC developers are planning to use the mprotect() side-effect of issuing memory barriers through IPIs as a way to implement Windows FlushProcessWriteBuffers() on Linux. They are referring to sys_membarrier in their github thread, specifically stating that sys_membarrier() is what they are looking for. To explain the benefit of this scheme, let's introduce two example threads: Thread A (non-frequent, e.g. executing liburcu synchronize_rcu()) Thread B (frequent, e.g. executing liburcu rcu_read_lock()/rcu_read_unlock()) In a scheme where all smp_mb() in thread A are ordering memory accesses with respect to smp_mb() present in Thread B, we can change each smp_mb() within Thread A into calls to sys_membarrier() and each smp_mb() within Thread B into compiler barriers "barrier()". Before the change, we had, for each smp_mb() pairs: Thread A Thread B previous mem accesses previous mem accesses smp_mb() smp_mb() following mem accesses following mem accesses After the change, these pairs become: Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses As we can see, there are two possible scenarios: either Thread B memory accesses do not happen concurrently with Thread A accesses (1), or they do (2). 1) Non-concurrent Thread A vs Thread B accesses: Thread A Thread B prev mem accesses sys_membarrier() follow mem accesses prev mem accesses barrier() follow mem accesses In this case, thread B accesses will be weakly ordered. This is OK, because at that point, thread A is not particularly interested in ordering them with respect to its own accesses. 2) Concurrent Thread A vs Thread B accesses Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses In this case, thread B accesses, which are ensured to be in program order thanks to the compiler barrier, will be "upgraded" to full smp_mb() by synchronize_sched(). * Benchmarks On Intel Xeon E5405 (8 cores) (one thread is calling sys_membarrier, the other 7 threads are busy looping) 1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call. * User-space user of this system call: Userspace RCU library Both the signal-based and the sys_membarrier userspace RCU schemes permit us to remove the memory barrier from the userspace RCU rcu_read_lock() and rcu_read_unlock() primitives, thus significantly accelerating them. These memory barriers are replaced by compiler barriers on the read-side, and all matching memory barriers on the write-side are turned into an invocation of a memory barrier on all active threads in the process. By letting the kernel perform this synchronization rather than dumbly sending a signal to every process threads (as we currently do), we diminish the number of unnecessary wake ups and only issue the memory barriers on active threads. Non-running threads do not need to execute such barrier anyway, because these are implied by the scheduler context switches. Results in liburcu: Operations in 10s, 6 readers, 2 writers: memory barriers in reader: 1701557485 reads, 2202847 writes signal-based scheme: 9830061167 reads, 6700 writes sys_membarrier: 9952759104 reads, 425 writes sys_membarrier (dyn. check): 7970328887 reads, 425 writes The dynamic sys_membarrier availability check adds some overhead to the read-side compared to the signal-based scheme, but besides that, sys_membarrier slightly outperforms the signal-based scheme. However, this non-expedited sys_membarrier implementation has a much slower grace period than signal and memory barrier schemes. Besides diminishing the number of wake-ups, one major advantage of the membarrier system call over the signal-based scheme is that it does not need to reserve a signal. This plays much more nicely with libraries, and with processes injected into for tracing purposes, for which we cannot expect that signals will be unused by the application. An expedited version of this system call can be added later on to speed up the grace period. Its implementation will likely depend on reading the cpu_curr()->mm without holding each CPU's rq lock. This patch adds the system call to x86 and to asm-generic. [1] http://urcu.so membarrier(2) man page: MEMBARRIER(2) Linux Programmer's Manual MEMBARRIER(2) NAME membarrier - issue memory barriers on a set of threads SYNOPSIS #include int membarrier(int cmd, int flags); DESCRIPTION The cmd argument is one of the following: MEMBARRIER_CMD_QUERY Query the set of supported commands. It returns a bitmask of supported commands. MEMBARRIER_CMD_SHARED Execute a memory barrier on all threads running on the system. Upon return from system call, the caller thread is ensured that all running threads have passed through a state where all memory accesses to user-space addresses match program order between entry to and return from the system call (non-running threads are de facto in such a state). This covers threads from all pro=E2=80=90 cesses running on the system. This command returns 0. The flags argument needs to be 0. For future extensions. All memory accesses performed in program order from each targeted thread is guaranteed to be ordered with respect to sys_membarrier(). If we use the semantic "barrier()" to represent a compiler barrier forcing memory accesses to be performed in program order across the barrier, and smp_mb() to represent explicit memory barriers forcing full memory ordering across the barrier, we have the following ordering table for each pair of barrier(), sys_membarrier() and smp_mb(): The pair ordering is detailed as (O: ordered, X: not ordered): barrier() smp_mb() sys_membarrier() barrier() X X O smp_mb() X O O sys_membarrier() O O O RETURN VALUE On success, these system calls return zero. On error, -1 is returned, and errno is set appropriately. For a given command, with flags argument set to 0, this system call is guaranteed to always return the same value until reboot. ERRORS ENOSYS System call is not implemented. EINVAL Invalid arguments. Linux 2015-04-15 MEMBARRIER(2) Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Reviewed-by: Josh Triplett Cc: KOSAKI Motohiro Cc: Steven Rostedt Cc: Nicholas Miell Cc: Ingo Molnar Cc: Alan Cox Cc: Lai Jiangshan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: David Howells Cc: Pranith Kumar Cc: Michael Kerrisk Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 310da42..e77bc84 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6789,6 +6789,14 @@ W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlxsw/ +MEMBARRIER SUPPORT +M: Mathieu Desnoyers +M: "Paul E. McKenney" +L: linux-kernel@vger.kernel.org +S: Supported +F: kernel/membarrier.c +F: include/uapi/linux/membarrier.h + MEMORY MANAGEMENT L: linux-mm@kvack.org W: http://www.linux-mm.org diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 477bfa6..7663c45 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -381,3 +381,4 @@ 372 i386 recvmsg sys_recvmsg compat_sys_recvmsg 373 i386 shutdown sys_shutdown 374 i386 userfaultfd sys_userfaultfd +375 i386 membarrier sys_membarrier diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 81c4906..278842f 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -330,6 +330,7 @@ 321 common bpf sys_bpf 322 64 execveat stub_execveat 323 common userfaultfd sys_userfaultfd +324 common membarrier sys_membarrier # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0800131..a460e2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -885,4 +885,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); +asmlinkage long sys_membarrier(int cmd, int flags); + #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index e016bd9..8da542a 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 281 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) +#define __NR_membarrier 282 +__SYSCALL(__NR_membarrier, sys_membarrier) #undef __NR_syscalls -#define __NR_syscalls 282 +#define __NR_syscalls 283 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 70ff1d9..f7b2db4 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -252,6 +252,7 @@ header-y += mdio.h header-y += media.h header-y += media-bus-format.h header-y += mei.h +header-y += membarrier.h header-y += memfd.h header-y += mempolicy.h header-y += meye.h diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h new file mode 100644 index 0000000..e0b108b --- /dev/null +++ b/include/uapi/linux/membarrier.h @@ -0,0 +1,53 @@ +#ifndef _UAPI_LINUX_MEMBARRIER_H +#define _UAPI_LINUX_MEMBARRIER_H + +/* + * linux/membarrier.h + * + * membarrier system call API + * + * Copyright (c) 2010, 2015 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * enum membarrier_cmd - membarrier system call command + * @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns + * a bitmask of valid commands. + * @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads. + * Upon return from system call, the caller thread + * is ensured that all running threads have passed + * through a state where all memory accesses to + * user-space addresses match program order between + * entry to and return from the system call + * (non-running threads are de facto in such a + * state). This covers threads from all processes + * running on the system. This command returns 0. + * + * Command to be passed to the membarrier system call. The commands need to + * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to + * the value 0. + */ +enum membarrier_cmd { + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), +}; + +#endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/init/Kconfig b/init/Kconfig index 02da9f1..c24b6f7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1602,6 +1602,18 @@ config PCI_QUIRKS bugs/quirks. Disable this only if your target machine is unaffected by PCI quirks. +config MEMBARRIER + bool "Enable membarrier() system call" if EXPERT + default y + help + Enable the membarrier() system call that allows issuing memory + barriers across all running threads, which can be used to distribute + the cost of user-space memory barriers asymmetrically by transforming + pairs of memory barriers into pairs consisting of membarrier() and a + compiler barrier. + + If unsure, say Y. + config EMBEDDED bool "Embedded system" option allnoconfig_y diff --git a/kernel/Makefile b/kernel/Makefile index d498841..53abf00 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o obj-$(CONFIG_TORTURE_TEST) += torture.o +obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_HAS_IOMEM) += memremap.o diff --git a/kernel/membarrier.c b/kernel/membarrier.c new file mode 100644 index 0000000..536c727 --- /dev/null +++ b/kernel/membarrier.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2010, 2015 Mathieu Desnoyers + * + * membarrier system call + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +/* + * Bitmask made from a "or" of all commands within enum membarrier_cmd, + * except MEMBARRIER_CMD_QUERY. + */ +#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED) + +/** + * sys_membarrier - issue memory barriers on a set of threads + * @cmd: Takes command values defined in enum membarrier_cmd. + * @flags: Currently needs to be 0. For future extensions. + * + * If this system call is not implemented, -ENOSYS is returned. If the + * command specified does not exist, or if the command argument is invalid, + * this system call returns -EINVAL. For a given command, with flags argument + * set to 0, this system call is guaranteed to always return the same value + * until reboot. + * + * All memory accesses performed in program order from each targeted thread + * is guaranteed to be ordered with respect to sys_membarrier(). If we use + * the semantic "barrier()" to represent a compiler barrier forcing memory + * accesses to be performed in program order across the barrier, and + * smp_mb() to represent explicit memory barriers forcing full memory + * ordering across the barrier, we have the following ordering table for + * each pair of barrier(), sys_membarrier() and smp_mb(): + * + * The pair ordering is detailed as (O: ordered, X: not ordered): + * + * barrier() smp_mb() sys_membarrier() + * barrier() X X O + * smp_mb() X O O + * sys_membarrier() O O O + */ +SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) +{ + if (unlikely(flags)) + return -EINVAL; + switch (cmd) { + case MEMBARRIER_CMD_QUERY: + return MEMBARRIER_CMD_BITMASK; + case MEMBARRIER_CMD_SHARED: + if (num_online_cpus() > 1) + synchronize_sched(); + return 0; + default: + return -EINVAL; + } +} diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 03c3875..a02decf 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -245,3 +245,6 @@ cond_syscall(sys_bpf); /* execveat */ cond_syscall(sys_execveat); + +/* membarrier */ +cond_syscall(sys_membarrier); -- cgit v0.10.2 From b6d973441675222a4e6c8cad8208c2fe098a0b25 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 11 Sep 2015 13:07:42 -0700 Subject: selftests: add membarrier syscall test Add a self test for the membarrier system call. Signed-off-by: Pranith Kumar Signed-off-by: Mathieu Desnoyers Cc: Michael Ellerman Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0501511..89b05e2 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,6 +6,7 @@ TARGETS += firmware TARGETS += ftrace TARGETS += futex TARGETS += kcmp +TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug TARGETS += mount diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore new file mode 100644 index 0000000..020c44f4 --- /dev/null +++ b/tools/testing/selftests/membarrier/.gitignore @@ -0,0 +1 @@ +membarrier_test diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile new file mode 100644 index 0000000..877a503 --- /dev/null +++ b/tools/testing/selftests/membarrier/Makefile @@ -0,0 +1,11 @@ +CFLAGS += -g -I../../../../usr/include/ + +all: + $(CC) $(CFLAGS) membarrier_test.c -o membarrier_test + +TEST_PROGS := membarrier_test + +include ../lib.mk + +clean: + $(RM) membarrier_test diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c new file mode 100644 index 0000000..3c9f217 --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -0,0 +1,71 @@ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +static int sys_membarrier(int cmd, int flags) +{ + return syscall(__NR_membarrier, cmd, flags); +} + +static void test_membarrier_fail(void) +{ + int cmd = -1, flags = 0; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Should fail but passed\n"); + ksft_exit_fail(); + } +} + +static void test_membarrier_success(void) +{ + int flags = 0; + + if (sys_membarrier(MEMBARRIER_CMD_SHARED, flags) != 0) { + printf("membarrier: Executing MEMBARRIER failed, %s\n", + strerror(errno)); + ksft_exit_fail(); + } + + printf("membarrier: MEMBARRIER_CMD_SHARED success\n"); +} + +static void test_membarrier(void) +{ + test_membarrier_fail(); + test_membarrier_success(); +} + +static int test_membarrier_exists(void) +{ + int flags = 0; + + if (sys_membarrier(MEMBARRIER_CMD_QUERY, flags)) + return 0; + + return 1; +} + +int main(int argc, char **argv) +{ + printf("membarrier: MEMBARRIER_CMD_QUERY "); + if (test_membarrier_exists()) { + printf("syscall implemented\n"); + test_membarrier(); + } else { + printf("syscall not implemented!\n"); + return ksft_exit_fail(); + } + + printf("membarrier: tests done!\n"); + + return ksft_exit_pass(); +} -- cgit v0.10.2 From c9946c4208a3725e116c05180d93154eb406d451 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 11 Sep 2015 13:07:45 -0700 Subject: selftests: enhance membarrier syscall test Update the membarrier syscall self-test to match the membarrier interface. Extend coverage of the interface. Consider ENOSYS as a "SKIP" test, since it is a valid configuration, but does not allow testing the system call. Signed-off-by: Mathieu Desnoyers Cc: Michael Ellerman Cc: Pranith Kumar Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 3c9f217..dde3125 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -10,62 +10,112 @@ #include "../kselftest.h" +enum test_membarrier_status { + TEST_MEMBARRIER_PASS = 0, + TEST_MEMBARRIER_FAIL, + TEST_MEMBARRIER_SKIP, +}; + static int sys_membarrier(int cmd, int flags) { return syscall(__NR_membarrier, cmd, flags); } -static void test_membarrier_fail(void) +static enum test_membarrier_status test_membarrier_cmd_fail(void) { int cmd = -1, flags = 0; if (sys_membarrier(cmd, flags) != -1) { - printf("membarrier: Should fail but passed\n"); - ksft_exit_fail(); + printf("membarrier: Wrong command should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_flags_fail(void) +{ + int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong flags should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; } + return TEST_MEMBARRIER_PASS; } -static void test_membarrier_success(void) +static enum test_membarrier_status test_membarrier_success(void) { - int flags = 0; + int cmd = MEMBARRIER_CMD_SHARED, flags = 0; - if (sys_membarrier(MEMBARRIER_CMD_SHARED, flags) != 0) { - printf("membarrier: Executing MEMBARRIER failed, %s\n", + if (sys_membarrier(cmd, flags) != 0) { + printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n", strerror(errno)); - ksft_exit_fail(); + return TEST_MEMBARRIER_FAIL; } - printf("membarrier: MEMBARRIER_CMD_SHARED success\n"); + printf("membarrier: MEMBARRIER_CMD_SHARED success.\n"); + return TEST_MEMBARRIER_PASS; } -static void test_membarrier(void) +static enum test_membarrier_status test_membarrier(void) { - test_membarrier_fail(); - test_membarrier_success(); + enum test_membarrier_status status; + + status = test_membarrier_cmd_fail(); + if (status) + return status; + status = test_membarrier_flags_fail(); + if (status) + return status; + status = test_membarrier_success(); + if (status) + return status; + return TEST_MEMBARRIER_PASS; } -static int test_membarrier_exists(void) +static enum test_membarrier_status test_membarrier_query(void) { - int flags = 0; - - if (sys_membarrier(MEMBARRIER_CMD_QUERY, flags)) - return 0; + int flags = 0, ret; - return 1; + printf("membarrier MEMBARRIER_CMD_QUERY "); + ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags); + if (ret < 0) { + printf("failed. %s.\n", strerror(errno)); + switch (errno) { + case ENOSYS: + /* + * It is valid to build a kernel with + * CONFIG_MEMBARRIER=n. However, this skips the tests. + */ + return TEST_MEMBARRIER_SKIP; + case EINVAL: + default: + return TEST_MEMBARRIER_FAIL; + } + } + if (!(ret & MEMBARRIER_CMD_SHARED)) { + printf("command MEMBARRIER_CMD_SHARED is not supported.\n"); + return TEST_MEMBARRIER_FAIL; + } + printf("syscall available.\n"); + return TEST_MEMBARRIER_PASS; } int main(int argc, char **argv) { - printf("membarrier: MEMBARRIER_CMD_QUERY "); - if (test_membarrier_exists()) { - printf("syscall implemented\n"); - test_membarrier(); - } else { - printf("syscall not implemented!\n"); + switch (test_membarrier_query()) { + case TEST_MEMBARRIER_FAIL: return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + switch (test_membarrier()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); } printf("membarrier: tests done!\n"); - return ksft_exit_pass(); } -- cgit v0.10.2 From 6798a8caaf64fa68b9ab2044e070fe4545034e03 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 11 Sep 2015 13:07:48 -0700 Subject: fs/seq_file: convert int seq_vprint/seq_printf/etc... returns to void The seq_ function return values were frequently misused. See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to seq_has_overflowed() and make public") All uses of these return values have been removed, so convert the return types to void. Miscellanea: o Move seq_put_decimal_ and seq_escape prototypes closer the other seq_vprintf prototypes o Reorder seq_putc and seq_puts to return early on overflow o Add argument names to seq_vprintf and seq_printf o Update the seq_escape kernel-doc o Convert a couple of leading spaces to tabs in seq_escape Signed-off-by: Joe Perches Cc: Al Viro Cc: Steven Rostedt Cc: Mark Brown Cc: Stephen Rothwell Cc: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index 0717aa9..9bc20e2 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -135,8 +135,9 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, struct seq_file *s) { - return seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram, + seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram, (cr->cam & MMU_CAM_P) ? 1 : 0); + return 0; } static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s) diff --git a/fs/nsfs.c b/fs/nsfs.c index e4905fb..8f20d60 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -142,7 +142,8 @@ static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = dentry->d_fsdata; - return seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); + seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); + return 0; } static const struct super_operations nsfs_ops = { diff --git a/fs/seq_file.c b/fs/seq_file.c index 263b125..225586e 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -372,16 +372,16 @@ EXPORT_SYMBOL(seq_release); * @esc: set of characters that need escaping * * Puts string into buffer, replacing each occurrence of character from - * @esc with usual octal escape. Returns 0 in case of success, -1 - in - * case of overflow. + * @esc with usual octal escape. + * Use seq_has_overflowed() to check for errors. */ -int seq_escape(struct seq_file *m, const char *s, const char *esc) +void seq_escape(struct seq_file *m, const char *s, const char *esc) { char *end = m->buf + m->size; - char *p; + char *p; char c; - for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { + for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { if (!strchr(esc, c)) { *p++ = c; continue; @@ -394,14 +394,13 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc) continue; } seq_set_overflow(m); - return -1; - } + return; + } m->count = p - m->buf; - return 0; } EXPORT_SYMBOL(seq_escape); -int seq_vprintf(struct seq_file *m, const char *f, va_list args) +void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; @@ -409,24 +408,20 @@ int seq_vprintf(struct seq_file *m, const char *f, va_list args) len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); if (m->count + len < m->size) { m->count += len; - return 0; + return; } } seq_set_overflow(m); - return -1; } EXPORT_SYMBOL(seq_vprintf); -int seq_printf(struct seq_file *m, const char *f, ...) +void seq_printf(struct seq_file *m, const char *f, ...) { - int ret; va_list args; va_start(args, f); - ret = seq_vprintf(m, f, args); + seq_vprintf(m, f, args); va_end(args); - - return ret; } EXPORT_SYMBOL(seq_printf); @@ -664,26 +659,25 @@ int seq_open_private(struct file *filp, const struct seq_operations *ops, } EXPORT_SYMBOL(seq_open_private); -int seq_putc(struct seq_file *m, char c) +void seq_putc(struct seq_file *m, char c) { - if (m->count < m->size) { - m->buf[m->count++] = c; - return 0; - } - return -1; + if (m->count >= m->size) + return; + + m->buf[m->count++] = c; } EXPORT_SYMBOL(seq_putc); -int seq_puts(struct seq_file *m, const char *s) +void seq_puts(struct seq_file *m, const char *s) { int len = strlen(s); - if (m->count + len < m->size) { - memcpy(m->buf + m->count, s, len); - m->count += len; - return 0; + + if (m->count + len >= m->size) { + seq_set_overflow(m); + return; } - seq_set_overflow(m); - return -1; + memcpy(m->buf + m->count, s, len); + m->count += len; } EXPORT_SYMBOL(seq_puts); @@ -694,8 +688,8 @@ EXPORT_SYMBOL(seq_puts); * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ -int seq_put_decimal_ull(struct seq_file *m, char delimiter, - unsigned long long num) +void seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num) { int len; @@ -707,35 +701,33 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter, if (num < 10) { m->buf[m->count++] = num + '0'; - return 0; + return; } len = num_to_str(m->buf + m->count, m->size - m->count, num); if (!len) goto overflow; m->count += len; - return 0; + return; + overflow: seq_set_overflow(m); - return -1; } EXPORT_SYMBOL(seq_put_decimal_ull); -int seq_put_decimal_ll(struct seq_file *m, char delimiter, - long long num) +void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num) { if (num < 0) { if (m->count + 3 >= m->size) { seq_set_overflow(m); - return -1; + return; } if (delimiter) m->buf[m->count++] = delimiter; num = -num; delimiter = '-'; } - return seq_put_decimal_ull(m, delimiter, num); - + seq_put_decimal_ull(m, delimiter, num); } EXPORT_SYMBOL(seq_put_decimal_ll); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index adeadbd..dde00de 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -114,13 +114,18 @@ int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); -int seq_escape(struct seq_file *, const char *, const char *); -int seq_putc(struct seq_file *m, char c); -int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); -__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); -__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); +__printf(2, 0) +void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); +__printf(2, 3) +void seq_printf(struct seq_file *m, const char *fmt, ...); +void seq_putc(struct seq_file *m, char c); +void seq_puts(struct seq_file *m, const char *s); +void seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num); +void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, @@ -138,10 +143,6 @@ int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); -int seq_put_decimal_ull(struct seq_file *m, char delimiter, - unsigned long long num); -int seq_put_decimal_ll(struct seq_file *m, char delimiter, - long long num); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { -- cgit v0.10.2 From 4f1af60bcc2fc6caa7fa3036238b5994708e9a84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 11 Sep 2015 13:07:50 -0700 Subject: mm/early_ioremap: add explicit #include of asm/early_ioremap.h Commit 6b0f68e32ea8 ("mm: add utility for early copy from unmapped ram") introduces a function copy_from_early_mem() into mm/early_ioremap.c which itself calls early_memremap()/early_memunmap(). However, since early_memunmap() has not been declared yet at this point in the .c file, nor by any explicitly included header files, we are depending on a transitive include of asm/early_ioremap.h to declare it, which is fragile. So instead, include this header explicitly. Signed-off-by: Ard Biesheuvel Acked-by: Mark Salter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 23f744d..17ae14b 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_MMU static int early_ioremap_debug __initdata; -- cgit v0.10.2 From e527b22c3f63c02832ac4cb8ed0ec3a9b638bbdf Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 11 Sep 2015 13:07:53 -0700 Subject: revert "ocfs2/dlm: use list_for_each_entry instead of list_for_each" Revert commit f83c7b5e9fd6 ("ocfs2/dlm: use list_for_each_entry instead of list_for_each"). list_for_each_entry() will dereference its `pos' argument, which can be NULL in dlm_process_recovery_data(). Reported-by: Julia Lawall Reported-by: Fengguang Wu Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index d0e436d..ce12e0b 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1776,7 +1776,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_migratable_lockres *mres) { struct dlm_migratable_lock *ml; - struct list_head *queue; + struct list_head *queue, *iter; struct list_head *tmpq = NULL; struct dlm_lock *newlock = NULL; struct dlm_lockstatus *lksb = NULL; @@ -1821,7 +1821,9 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { + list_for_each(iter, tmpq) { + lock = list_entry(iter, + struct dlm_lock, list); if (lock->ml.cookie == ml->cookie) break; lock = NULL; -- cgit v0.10.2 From 38c089d1d8d058f5dff018a811568aa8e8bc47fc Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 11 Sep 2015 15:01:16 -0700 Subject: openvswitch: Fix dependency on IPv6 defrag. When NF_CONNTRACK is built-in, NF_DEFRAG_IPV6 is a module, and OPENVSWITCH is built-in, the following build error would occur: net/built-in.o: In function `ovs_ct_execute': (.text+0x10f587): undefined reference to `nf_ct_frag6_gather' Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Reported-by: Jim Davis Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2a071f4..d143aa9 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -5,7 +5,8 @@ config OPENVSWITCH tristate "Open vSwitch" depends on INET - depends on (!NF_CONNTRACK || NF_CONNTRACK) + depends on !NF_CONNTRACK || \ + (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) select LIBCRC32C select MPLS select NET_MPLS_GSO -- cgit v0.10.2 From e8684c88774c0ddfeefdbed0aa469b25b9962f3e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 12 Sep 2015 00:34:48 +0300 Subject: irda: ali-ircc: Fix deadlock in ali_ircc_sir_change_speed() ali_ircc_sir_change_speed() is always called with self->lock held, so acquiring the lock inside it leads to unavoidable deadlock. Call graph: ali_ircc_sir_change_speed() is called from ali_ircc_change_speed() ali_ircc_fir_hard_xmit() under spin_lock_irqsave(&self->lock, flags); ali_ircc_sir_hard_xmit() under spin_lock_irqsave(&self->lock, flags); ali_ircc_net_ioctl() under spin_lock_irqsave(&self->lock, flags); ali_ircc_dma_xmit_complete() ali_ircc_fir_interrupt() ali_ircc_interrupt() under spin_lock(&self->lock); ali_ircc_sir_write_wakeup() ali_ircc_sir_interrupt() ali_ircc_interrupt() under spin_lock(&self->lock); The patch removes spin_lock/unlock from ali_ircc_sir_change_speed(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index 58ae11a..64bb44d 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -1031,7 +1031,6 @@ static void ali_ircc_fir_change_speed(struct ali_ircc_cb *priv, __u32 baud) static void ali_ircc_sir_change_speed(struct ali_ircc_cb *priv, __u32 speed) { struct ali_ircc_cb *self = priv; - unsigned long flags; int iobase; int fcr; /* FIFO control reg */ int lcr; /* Line control reg */ @@ -1061,8 +1060,6 @@ static void ali_ircc_sir_change_speed(struct ali_ircc_cb *priv, __u32 speed) /* Update accounting for new speed */ self->io.speed = speed; - spin_lock_irqsave(&self->lock, flags); - divisor = 115200/speed; fcr = UART_FCR_ENABLE_FIFO; @@ -1089,9 +1086,6 @@ static void ali_ircc_sir_change_speed(struct ali_ircc_cb *priv, __u32 speed) /* without this, the connection will be broken after come back from FIR speed, but with this, the SIR connection is harder to established */ outb((UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2), iobase+UART_MCR); - - spin_unlock_irqrestore(&self->lock, flags); - } static void ali_ircc_change_dongle_speed(struct ali_ircc_cb *priv, int speed) -- cgit v0.10.2 From eda2116f4ab6d79cfcffc202b5d2bbb0797ba013 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Sep 2015 19:24:19 -0500 Subject: [CIFS] mount option sec=none not displayed properly in /proc/mounts When the user specifies "sec=none" in a cifs mount, we set sec_type as unspecified (and set a flag and the username will be null) rather than setting sectype as "none" so cifs_show_security was not properly displaying it in cifs /proc/mounts entries. Signed-off-by: Steve French Reviewed-by: Jeff Layton diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6a1119e..e739950 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) static void cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { - if (ses->sectype == Unspecified) + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); return; + } seq_puts(s, ",sec="); -- cgit v0.10.2 From 3ebb0540c20d6670396ccee9ff6794c095fa9311 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 11 Sep 2015 16:32:22 -0700 Subject: ARCv2: [axs103_smp] Reduce clk for SMP FPGA configs Newer bitfiles needs the reduced clk even for SMP builds Cc: #4.2 Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index ad9825d..0a77b19 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -402,6 +402,8 @@ static void __init axs103_early_init(void) unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) arc_set_core_freq(50 * 1000000); + else if (num_cores == 2) + arc_set_core_freq(75 * 1000000); #endif switch (arc_get_core_freq()/1000000) { -- cgit v0.10.2 From dfb22fc5c0eb7645f47a752ce537bfb2c8a6aea6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Sep 2015 20:06:59 -0700 Subject: thermal: fix intel PCH thermal driver mismerge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I didn't notice this when merging the thermal code from Zhang, but his merge (commit 5a924a07f882: "Merge branches 'thermal-core' and 'thermal-intel' of .git into next") of the thermal-core and thermal-intel branches was wrong. In thermal-core, commit 17e8351a7739 ("thermal: consistently use int for temperatures") converted the thermal layer to use "int" for temperatures. But in parallel, in the thermal-intel branch commit d0a12625d2ff ("thermal: Add Intel PCH thermal driver") added support for the intel PCH thermal sensor using the old interfaces that used "unsigned long" pointers. This resulted in warnings like this: drivers/thermal/intel_pch_thermal.c:184:14: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types] .get_temp = pch_thermal_get_temp, ^ drivers/thermal/intel_pch_thermal.c:184:14: note: (near initialization for ‘tzd_ops.get_temp’) drivers/thermal/intel_pch_thermal.c:186:19: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types] .get_trip_temp = pch_get_trip_temp, ^ drivers/thermal/intel_pch_thermal.c:186:19: note: (near initialization for ‘tzd_ops.get_trip_temp’) This fixes it. Signed-off-by: Linus Torvalds diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c index 1650a62..50c7da7 100644 --- a/drivers/thermal/intel_pch_thermal.c +++ b/drivers/thermal/intel_pch_thermal.c @@ -117,8 +117,7 @@ read_trips: return 0; } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd, - unsigned long *temp) +static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) { u8 wpt_temp; @@ -132,7 +131,7 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, struct pch_dev_ops { int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); - int (*get_temp)(struct pch_thermal_device *ptd, unsigned long *temp); + int (*get_temp)(struct pch_thermal_device *ptd, int *temp); }; @@ -142,8 +141,7 @@ static struct pch_dev_ops pch_dev_ops_wpt = { .get_temp = pch_wpt_get_temp, }; -static int pch_thermal_get_temp(struct thermal_zone_device *tzd, - unsigned long *temp) +static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; @@ -165,8 +163,7 @@ static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip, return 0; } -static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long *temp) +static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; -- cgit v0.10.2 From 505a666ee3fc611518e85df203eb8c707995ceaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Sep 2015 13:37:19 -0700 Subject: writeback: plug writeback in wb_writeback() and writeback_inodes_wb() We had to revert the pluggin in writeback_sb_inodes() because the wb->list_lock is held, but we could easily plug at a higher level before taking that lock, and unplug after releasing it. This does that. Chris will run performance numbers, just to verify that this approach is comparable to the alternative (we could just drop and re-take the lock around the blk_finish_plug() rather than these two commits. I'd have preferred waiting for actual performance numbers before picking one approach over the other, but I don't want to release rc1 with the known "sleeping function called from invalid context" issue, so I'll pick this cleanup version for now. But if the numbers show that we really want to plug just at the writeback_sb_inodes() level, and we should just play ugly games with the spinlock, we'll switch to that. Cc: Chris Mason Cc: Josef Bacik Cc: Dave Chinner Cc: Neil Brown Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Linus Torvalds diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d8ea7ed..587ac08 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1546,12 +1546,15 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .range_cyclic = 1, .reason = reason, }; + struct blk_plug plug; + blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work.nr_pages; } @@ -1579,10 +1582,12 @@ static long wb_writeback(struct bdi_writeback *wb, unsigned long oldest_jif; struct inode *inode; long progress; + struct blk_plug plug; oldest_jif = jiffies; work->older_than_this = &oldest_jif; + blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { /* @@ -1662,6 +1667,7 @@ static long wb_writeback(struct bdi_writeback *wb, } } spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work->nr_pages; } -- cgit v0.10.2 From 10fbd36e362a0f367e34a7cd876a81295d8fc5ca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 May 2015 15:32:15 -0700 Subject: blk: rq_data_dir() should not return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rq_data_dir() returns either READ or WRITE (0 == READ, 1 == WRITE), not a boolean value. Now, admittedly the "!= 0" doesn't really change the value (0 stays as zero, 1 stays as one), but it's not only redundant, it confuses gcc, and causes gcc to warn about the construct switch (rq_data_dir(req)) { case READ: ... case WRITE: ... that we have in a few drivers. Now, the gcc warning is silly and stupid (it seems to warn not about the switch value having a different type from the case statements, but about _any_ boolean switch value), but in this case the code itself is silly and stupid too, so let's just change it, and get rid of warnings like this: drivers/block/hd.c: In function ‘hd_request’: drivers/block/hd.c:630:11: warning: switch condition has boolean value [-Wswitch-bool] switch (rq_data_dir(req)) { The odd '!= 0' came in when "cmd_flags" got turned into a "u64" in commit 5953316dbf90 ("block: make rq->cmd_flags be 64-bit") and is presumably because the old code (that just did a logical 'and' with 1) would then end up making the type of rq_data_dir() be u64 too. But if we want to retain the old regular integer type, let's just cast the result to 'int' rather than use that rather odd '!= 0'. Signed-off-by: Linus Torvalds diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 708923b9..38a5ff7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -584,7 +584,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) +#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1)) /* * Driver can handle struct request, if it either has an old style -- cgit v0.10.2 From 6ff33f3902c3b1c5d0db6b1e2c70b6d76fba357f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 12 Sep 2015 16:35:56 -0700 Subject: Linux 4.3-rc1 diff --git a/Makefile b/Makefile index f2d2706..1a132ea 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 -PATCHLEVEL = 2 +PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* -- cgit v0.10.2 From 728d29400488d54974d3317fe8a232b45fdb42ee Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 16:13:47 -0700 Subject: hwmon: (nct6775) Swap STEP_UP_TIME and STEP_DOWN_TIME registers for most chips The STEP_UP_TIME and STEP_DOWN_TIME registers are swapped for all chips but NCT6775. Reported-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99d..2aaedbe 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -354,6 +354,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -3528,8 +3532,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3604,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3677,8 +3681,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; -- cgit v0.10.2 From cd1faefa66425c3fa338777773c5c017edea3439 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Aug 2015 19:45:19 -0700 Subject: hwmon: (nct6775) Add support for NCT6793D NCT6793D is register compatible with NCT6792D. Also move nct6775_sio_names[] closer to enum kinds to simplify adding new chips. Tested-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775 index f0dd3d2..76add4c 100644 --- a/Documentation/hwmon/nct6775 +++ b/Documentation/hwmon/nct6775 @@ -32,6 +32,10 @@ Supported chips: Prefix: 'nct6792' Addresses scanned: ISA address retrieved from Super I/O registers Datasheet: Available from Nuvoton upon request + * Nuvoton NCT6793D + Prefix: 'nct6793' + Addresses scanned: ISA address retrieved from Super I/O registers + Datasheet: Available from Nuvoton upon request Authors: Guenter Roeck diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 500b262..e13c902 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1140,8 +1140,8 @@ config SENSORS_NCT6775 help If you say yes here you get support for the hardware monitoring functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D, - NCT6791D, NCT6792D and compatible Super-I/O chips. This driver - replaces the w83627ehf driver for NCT6775F and NCT6776F. + NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This + driver replaces the w83627ehf driver for NCT6775F and NCT6776F. This driver can also be built as a module. If so, the module will be called nct6775. diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2aaedbe..8b4fa55 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -39,6 +39,7 @@ * nct6779d 15 5 5 2+6 0xc560 0xc1 0x5ca3 * nct6791d 15 6 6 2+6 0xc800 0xc1 0x5ca3 * nct6792d 15 6 6 2+6 0xc910 0xc1 0x5ca3 + * nct6793d 15 6 6 2+6 0xd120 0xc1 0x5ca3 * * #temp lists the number of monitored temperature sources (first value) plus * the number of directly connectable temperature sensors (second value). @@ -63,7 +64,7 @@ #define USE_ALTERNATE -enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792 }; +enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793 }; /* used to set data->name = nct6775_device_names[data->sio_kind] */ static const char * const nct6775_device_names[] = { @@ -73,6 +74,17 @@ static const char * const nct6775_device_names[] = { "nct6779", "nct6791", "nct6792", + "nct6793", +}; + +static const char * const nct6775_sio_names[] __initconst = { + "NCT6106D", + "NCT6775F", + "NCT6776D/F", + "NCT6779D", + "NCT6791D", + "NCT6792D", + "NCT6793D", }; static unsigned short force_id; @@ -104,6 +116,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal"); #define SIO_NCT6779_ID 0xc560 #define SIO_NCT6791_ID 0xc800 #define SIO_NCT6792_ID 0xc910 +#define SIO_NCT6793_ID 0xd120 #define SIO_ID_MASK 0xFFF0 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 }; @@ -537,7 +550,7 @@ static const s8 NCT6791_ALARM_BITS[] = { 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, 9 }; /* intrusion0, intrusion1 */ -/* NCT6792 specific data */ +/* NCT6792/NCT6793 specific data */ static const u16 NCT6792_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d }; @@ -1060,6 +1073,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) case nct6779: case nct6791: case nct6792: + case nct6793: return reg == 0x150 || reg == 0x153 || reg == 0x155 || ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) || reg == 0x402 || @@ -1411,6 +1425,7 @@ static void nct6775_update_pwm_limits(struct device *dev) case nct6779: case nct6791: case nct6792: + case nct6793: reg = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[i]); if (reg & data->CRITICAL_PWM_ENABLE_MASK) @@ -2826,6 +2841,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr, case nct6779: case nct6791: case nct6792: + case nct6793: nct6775_write_value(data, data->REG_CRITICAL_PWM[nr], val); reg = nct6775_read_value(data, @@ -3260,7 +3276,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data) pwm4pin = false; pwm5pin = false; pwm6pin = false; - } else { /* NCT6779D, NCT6791D, or NCT6792D */ + } else { /* NCT6779D, NCT6791D, NCT6792D, or NCT6793D */ regval = superio_inb(sioreg, 0x1c); fan3pin = !(regval & (1 << 5)); @@ -3273,7 +3289,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan4min = fan4pin; - if (data->kind == nct6791 || data->kind == nct6792) { + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) { regval = superio_inb(sioreg, 0x2d); fan6pin = (regval & (1 << 1)); pwm6pin = (regval & (1 << 0)); @@ -3647,6 +3664,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: data->in_num = 15; data->pwm_num = 6; data->auto_pwm_num = 4; @@ -3922,6 +3940,7 @@ static int nct6775_probe(struct platform_device *pdev) case nct6779: case nct6791: case nct6792: + case nct6793: break; } @@ -3954,6 +3973,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: tmp |= 0x7e; break; } @@ -4051,7 +4071,8 @@ static int __maybe_unused nct6775_resume(struct device *dev) if (reg != data->sio_reg_enable) superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable); - if (data->kind == nct6791 || data->kind == nct6792) + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) nct6791_enable_io_mapping(sioreg); superio_exit(sioreg); @@ -4110,15 +4131,6 @@ static struct platform_driver nct6775_driver = { .probe = nct6775_probe, }; -static const char * const nct6775_sio_names[] __initconst = { - "NCT6106D", - "NCT6775F", - "NCT6776D/F", - "NCT6779D", - "NCT6791D", - "NCT6792D", -}; - /* nct6775_find() looks for a '627 in the Super-I/O config space */ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) { @@ -4154,6 +4166,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) case SIO_NCT6792_ID: sio_data->kind = nct6792; break; + case SIO_NCT6793_ID: + sio_data->kind = nct6793; + break; default: if (val != 0xffff) pr_debug("unsupported chip ID: 0x%04x\n", val); @@ -4179,7 +4194,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01); } - if (sio_data->kind == nct6791 || sio_data->kind == nct6792) + if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || + sio_data->kind == nct6793) nct6791_enable_io_mapping(sioaddr); superio_exit(sioaddr); @@ -4289,7 +4305,7 @@ static void __exit sensors_nct6775_exit(void) } MODULE_AUTHOR("Guenter Roeck "); -MODULE_DESCRIPTION("NCT6106D/NCT6775F/NCT6776F/NCT6779D/NCT6791D/NCT6792D driver"); +MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips"); MODULE_LICENSE("GPL"); module_init(sensors_nct6775_init); -- cgit v0.10.2 From 7c5b190e115a2f7a51a85f261e7d7dca4b4bbe64 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 10 Sep 2015 21:55:27 +0200 Subject: x86/cpu: Print family/model/stepping in hex 924e101a7ab6 ("x86/debug: Dump family, model, stepping of the boot CPU") had its good intentions to dump the exact F/M/S as an aid during debugging sessions but its output can be ambiguous. Fix that: -smpboot: CPU0: Intel Core Processor (Broadwell) (fam: 06, model: 47, stepping: 02) +smpboot: CPU0: Intel Core Processor (Broadwell) (family: 0x6, model: 0x47, stepping: 0x2) Also, spell out "family". Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441914927-32037-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c..de22ea7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "%d86", c->x86); - printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); + printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); else printk(KERN_CONT ")\n"); -- cgit v0.10.2 From ebfb4988f0378e2ac3b4a0aa1ea20d724293f392 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2015 11:58:27 +0200 Subject: perf/x86/intel: Fix constraint access Sasha reported that we can get here with .idx==-1, and cpuc->event_constraints unallocated. Suggested-by: Stephane Eranian Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Fixes: b371b5943178 ("perf/x86: Fix event/group validation") Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3f124d5..f41e4dc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2316,9 +2316,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls -- cgit v0.10.2 From 2619d7e9c92d524cb155ec89fd72875321512e5b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 9 Sep 2015 16:07:30 -0700 Subject: time: Fix timekeeping_freqadjust()'s incorrect use of abs() instead of abs64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal clocksteering done for fine-grained error correction uses a logarithmic approximation, so any time adjtimex() adjusts the clock steering, timekeeping_freqadjust() quickly approximates the correct clock frequency over a series of ticks. Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit: dc491596f639 ("timekeeping: Rework frequency adjustments to work better w/ nohz") used the abs() function with a s64 error value to calculate the size of the approximated adjustment to be made. Per include/linux/kernel.h: "abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()". Thus on 32-bit platforms, this resulted in the clocksteering to take a quite dampended random walk trying to converge on the proper frequency, which caused the adjustments to be made much slower then intended (most easily observed when large adjustments are made). This patch fixes the issue by using abs64() instead. Reported-by: Nuno Gonçalves Tested-by: Nuno Goncalves Signed-off-by: John Stultz Cc: # v3.17+ Cc: Linus Torvalds Cc: Miroslav Lichvar Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441840051-20244-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6..3739ac6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; -- cgit v0.10.2 From caa470475d9b59eeff093ae650800d34612c4379 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Sep 2015 12:36:12 -0300 Subject: perf header: Fixup reading of HEADER_NRCPUS feature The original patch introducing this header wrote the number of CPUs available and online in one order and then swapped those values when reading, fix it. Before: # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 # echo 0 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 3 # echo 0 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 2 After the fix, bringing back the CPUs online: # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 2 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 3 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Kan Liang Cc: Stephane Eranian Cc: Wang Nan Fixes: fbe96f29ce4b ("perf tools: Make perf.data more self-descriptive (v8)") Link: http://lkml.kernel.org/r/20150911153323.GP23511@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4181454..fce6634 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1438,7 +1438,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1447,7 +1447,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } -- cgit v0.10.2 From 9b302c1acf4114d51b7f0962df0b0ddee0dc75cd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Aug 2015 14:28:14 +0200 Subject: drivers: sh: Disable legacy default PM Domain on emev2 EMMA Mobile EV2 doesn't have MSTP clocks. All its device drivers manage clocks explicitly, without relying on Runtime PM, so it doesn't need the legacy default PM Domain. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulf Hansson Signed-off-by: Simon Horman diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index d3d1891..ad91881 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -35,8 +35,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = { static int __init sh_pm_runtime_init(void) { if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) { - if (!of_machine_is_compatible("renesas,emev2") && - !of_machine_is_compatible("renesas,r7s72100") && + if (!of_machine_is_compatible("renesas,r7s72100") && #ifndef CONFIG_PM_GENERIC_DOMAINS_OF !of_machine_is_compatible("renesas,r8a73a4") && !of_machine_is_compatible("renesas,r8a7740") && -- cgit v0.10.2 From cbc41d0a761bffb3166a413a3c77100a737c0cd7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Aug 2015 14:28:15 +0200 Subject: drivers: sh: Disable PM runtime for multi-platform ARM with genpd If the default PM Domain using PM_CLK is used for PM runtime, the real Clock Domain cannot be registered from DT later. Hence do not enable it when running a multi-platform kernel with genpd support on R-Car or RZ. The CPG/MSTP Clock Domain driver will take care of PM runtime management of the module clocks. Now most multi-platform ARM shmobile platforms (SH-Mobile, R-Mobile, R-Car, RZ) use DT-based PM Domains to take care of PM runtime management of the module clocks, simplify the platform logic by replacing the explicit SoC checks by a single check for the presence of MSTP clocks in DT. Backwards-compatiblity with old DTs (mainly for R-Car Gen2) is provided by checking for the presence of a "#power-domain-cells" property in DT. The default PM Domain is still needed for: - backwards-compatibility with old DTs that lack PM Domain properties, - the CONFIG_PM=n case, - legacy (non-DT) ARM/shmobile platforms without genpd support (r8a7778, r8a7779), - legacy SuperH. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulf Hansson Signed-off-by: Simon Horman diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index ad91881..25abd4e 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -35,19 +35,11 @@ static struct pm_clk_notifier_block platform_bus_notifier = { static int __init sh_pm_runtime_init(void) { if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) { - if (!of_machine_is_compatible("renesas,r7s72100") && -#ifndef CONFIG_PM_GENERIC_DOMAINS_OF - !of_machine_is_compatible("renesas,r8a73a4") && - !of_machine_is_compatible("renesas,r8a7740") && - !of_machine_is_compatible("renesas,sh73a0") && -#endif - !of_machine_is_compatible("renesas,r8a7778") && - !of_machine_is_compatible("renesas,r8a7779") && - !of_machine_is_compatible("renesas,r8a7790") && - !of_machine_is_compatible("renesas,r8a7791") && - !of_machine_is_compatible("renesas,r8a7792") && - !of_machine_is_compatible("renesas,r8a7793") && - !of_machine_is_compatible("renesas,r8a7794")) + if (!of_find_compatible_node(NULL, NULL, + "renesas,cpg-mstp-clocks")) + return 0; + if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS_OF) && + of_find_node_with_property(NULL, "#power-domain-cells")) return 0; } -- cgit v0.10.2 From 216076705d6ac291d42e0f8dd85e6a0da98c0fa3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 8 Sep 2015 08:56:13 -0400 Subject: dm thin: disable discard support for thin devices if pool's is disabled If the pool is configured with 'ignore_discard' its discard support is disabled. The pool's thin devices should also have queue_limits that reflect discards are disabled. Fixes: 34fbcf62 ("dm thin: range discard support") Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.1+ diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6578b7b..6fcbfb0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4249,6 +4249,10 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; struct pool *pool = tc->pool; + struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md); + + if (!pool_limits->discard_granularity) + return; /* pool's discard support is disabled */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */ -- cgit v0.10.2 From cd33dc9ac2977ebe30cecbf39d2992190fbac5b4 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 8 Sep 2015 14:51:12 +0100 Subject: thermal: Fix thermal_zone_of_sensor_register to match documentation thermal_zone_of_sensor_register is documented as returning a pointer to either a valid thermal_zone_device on success, or a corresponding ERR_PTR() value. In contrast, the function returns NULL when THERMAL_OF is configured off. Fix this. Signed-off-by: Punit Agrawal Acked-by: Guenter Roeck Cc: Eduardo Valentin Cc: Zhang Rui Signed-off-by: Eduardo Valentin diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 17292fe..0c5518e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -360,7 +360,7 @@ static inline struct thermal_zone_device * thermal_zone_of_sensor_register(struct device *dev, int id, void *data, const struct thermal_zone_of_device_ops *ops) { - return NULL; + return ERR_PTR(-ENODEV); } static inline -- cgit v0.10.2 From f9e2b05218e64fc6c614b90a650863981701f974 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:39:44 -0700 Subject: thermal: hisi: allow compile test Adding COMPILE_TEST flag to hisi driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 0390044..ef48f23 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -163,7 +163,7 @@ config THERMAL_EMULATION config HISI_THERMAL tristate "Hisilicon thermal driver" - depends on ARCH_HISI && CPU_THERMAL && OF + depends on (ARCH_HISI && CPU_THERMAL && OF) || COMPILE_TEST help Enable this to plug hisilicon's thermal sensor driver into the Linux thermal framework. cpufreq is used as the cooling device to throttle -- cgit v0.10.2 From aa2937b73a45e49704b24db8d9163d83002332d9 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:42:01 -0700 Subject: thermal: spear: allow compile test Adding COMPILE_TEST flag to spear driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Viresh Kumar Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index ef48f23..e2cfaa4 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -182,7 +182,7 @@ config IMX_THERMAL config SPEAR_THERMAL bool "SPEAr thermal sensor driver" - depends on PLAT_SPEAR + depends on PLAT_SPEAR || COMPILE_TEST depends on OF help Enable this to plug the SPEAr thermal sensor driver into the Linux -- cgit v0.10.2 From 444f9b00073668f028db0c14f609cf08abc9a2b3 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:43:57 -0700 Subject: thermal: rockchip: allow compile test Adding COMPILE_TEST flag to rockchip driver to facilitate maintenance. Cc: Zhang Rui Cc: Heiko Stuebner Cc: linux-pm@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index e2cfaa4..f114dd6 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -190,7 +190,7 @@ config SPEAR_THERMAL config ROCKCHIP_THERMAL tristate "Rockchip thermal driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST depends on RESET_CONTROLLER help Rockchip thermal driver provides support for Temperature sensor -- cgit v0.10.2 From 9c8aa959e13f5cb082ad244d2c9b806a67d9a8b6 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:44:46 -0700 Subject: thermal: kirkwood: allow compile test Adding COMPILE_TEST flag to kirkwood driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f114dd6..1b74f4e 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -208,7 +208,7 @@ config RCAR_THERMAL config KIRKWOOD_THERMAL tristate "Temperature sensor on Marvell Kirkwood SoCs" - depends on MACH_KIRKWOOD + depends on MACH_KIRKWOOD || COMPILE_TEST depends on OF help Support for the Kirkwood thermal sensor driver into the Linux thermal -- cgit v0.10.2 From 07fffd5ccab70bc00fd69b9ce2297a5e8d05b838 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:45:25 -0700 Subject: thermal: dove: allow compile test Adding COMPILE_TEST flag to dove driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 1b74f4e..09fe548 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -216,7 +216,7 @@ config KIRKWOOD_THERMAL config DOVE_THERMAL tristate "Temperature sensor on Marvell Dove SoCs" - depends on ARCH_DOVE || MACH_DOVE + depends on ARCH_DOVE || MACH_DOVE || COMPILE_TEST depends on OF help Support for the Dove thermal sensor driver in the Linux thermal -- cgit v0.10.2 From 1b1582672121e1c1138e68b64128d94e7f61d10c Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:48:00 -0700 Subject: thermal: armada: allow compile test Adding COMPILE_TEST flag to armada driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 09fe548..f153f58 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -234,7 +234,7 @@ config DB8500_THERMAL config ARMADA_THERMAL tristate "Armada 370/XP thermal management" - depends on ARCH_MVEBU + depends on ARCH_MVEBU || COMPILE_TEST depends on OF help Enable this option if you want to have support for thermal management -- cgit v0.10.2 From 2bf427353eb606481d15d2843c76cdffa29ed418 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:53:35 -0700 Subject: thermal: exynos: allow compile test Adding COMPILE_TEST flag to exynos driver to facilitate maintenance. Cc: Lukasz Majewski Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f153f58..15f7cde 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -353,7 +353,7 @@ source "drivers/thermal/ti-soc-thermal/Kconfig" endmenu menu "Samsung thermal drivers" -depends on ARCH_EXYNOS +depends on ARCH_EXYNOS || COMPILE_TEST source "drivers/thermal/samsung/Kconfig" endmenu -- cgit v0.10.2 From cb7fb4d342023d03070fcc1ed73fcd43682973d7 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:56:26 -0700 Subject: thermal: qcom_spmi: allow compile test Adding COMPILE_TEST flag to qcom_spmi driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 15f7cde..7d95587b 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -364,7 +364,7 @@ endmenu config QCOM_SPMI_TEMP_ALARM tristate "Qualcomm SPMI PMIC Temperature Alarm" - depends on OF && SPMI && IIO + depends on OF && (SPMI || COMPILE_TEST) && IIO select REGMAP_SPMI help This enables a thermal sysfs driver for Qualcomm plug-and-play (QPNP) -- cgit v0.10.2 From 41ae3c02743e8f1f21ceebca614342d26b4eb080 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 20:58:36 -0700 Subject: thermal: ti-soc: allow compile test Adding COMPILE_TEST flag to ti-soc driver to facilitate maintenance. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-omap@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig index bd4c7be..7a24018 100644 --- a/drivers/thermal/ti-soc-thermal/Kconfig +++ b/drivers/thermal/ti-soc-thermal/Kconfig @@ -1,7 +1,7 @@ config TI_SOC_THERMAL tristate "Texas Instruments SoCs temperature sensor driver" depends on THERMAL - depends on ARCH_HAS_BANDGAP + depends on ARCH_HAS_BANDGAP || COMPILE_TEST help If you say yes here you get support for the Texas Instruments OMAP4460+ on die bandgap temperature sensor support. The register @@ -24,7 +24,7 @@ config TI_THERMAL config OMAP4_THERMAL bool "Texas Instruments OMAP4 thermal support" depends on TI_SOC_THERMAL - depends on ARCH_OMAP4 + depends on ARCH_OMAP4 || COMPILE_TEST help If you say yes here you get thermal support for the Texas Instruments OMAP4 SoC family. The current chip supported are: @@ -38,7 +38,7 @@ config OMAP4_THERMAL config OMAP5_THERMAL bool "Texas Instruments OMAP5 thermal support" depends on TI_SOC_THERMAL - depends on SOC_OMAP5 + depends on SOC_OMAP5 || COMPILE_TEST help If you say yes here you get thermal support for the Texas Instruments OMAP5 SoC family. The current chip supported are: @@ -50,7 +50,7 @@ config OMAP5_THERMAL config DRA752_THERMAL bool "Texas Instruments DRA752 thermal support" depends on TI_SOC_THERMAL - depends on SOC_DRA7XX + depends on SOC_DRA7XX || COMPILE_TEST help If you say yes here you get thermal support for the Texas Instruments DRA752 SoC family. The current chip supported are: -- cgit v0.10.2 From ec2feb475f4880246166a47e066e7a601a6103af Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 9 Sep 2015 21:13:41 -0700 Subject: thermal: ti-soc: Kconfig fix to avoid menu showing wrongly Move the dependencies to menu, so we avoid showing it wrongly. Cc: Zhang Rui Cc: linux-pm@vger.kernel.org Cc: linux-omap@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 7d95587b..5aabc4b 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -349,6 +349,7 @@ config INTEL_PCH_THERMAL programmable trip points and other information. menu "Texas Instruments thermal drivers" +depends on ARCH_HAS_BANDGAP || COMPILE_TEST source "drivers/thermal/ti-soc-thermal/Kconfig" endmenu diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig index 7a24018..cb6686f 100644 --- a/drivers/thermal/ti-soc-thermal/Kconfig +++ b/drivers/thermal/ti-soc-thermal/Kconfig @@ -1,7 +1,5 @@ config TI_SOC_THERMAL tristate "Texas Instruments SoCs temperature sensor driver" - depends on THERMAL - depends on ARCH_HAS_BANDGAP || COMPILE_TEST help If you say yes here you get support for the Texas Instruments OMAP4460+ on die bandgap temperature sensor support. The register -- cgit v0.10.2 From 64e05d8bcc837bc66cb2c6e71796c8883aa45939 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Aug 2015 07:32:11 +0530 Subject: thermal: cpu_cooling: Add MAINTAINERS entry None of the patches are reaching Viresh or Daniel directly as get_maintainers doesn't report us as maintainers. Looks like file header or history of commits isn't able to do that properly. Add a separate entry for cpu_cooling driver in MAINTAINERS. Acked-by: Eduardo Valentin Acked-by: Amit Daniel Kachhap Acked-by: Amit Daniel Kachhap Signed-off-by: Viresh Kumar Signed-off-by: Eduardo Valentin diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab7..6cc919a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10338,6 +10338,16 @@ F: include/uapi/linux/thermal.h F: include/linux/cpu_cooling.h F: Documentation/devicetree/bindings/thermal/ +THERMAL/CPU_COOLING +M: Amit Daniel Kachhap +M: Viresh Kumar +M: Javi Merino +L: linux-pm@vger.kernel.org +S: Supported +F: Documentation/thermal/cpu-cooling-api.txt +F: drivers/thermal/cpu_cooling.c +F: include/linux/cpu_cooling.h + THINGM BLINK(1) USB RGB LED DRIVER M: Vivien Didelot S: Maintained -- cgit v0.10.2 From 0847e26a84faa0fcb4572d7e42d0a44cab69a83d Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 13:10:14 +0200 Subject: thermal: db8500_cpufreq_cooling: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c index 607b62c..e58bd0b 100644 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ b/drivers/thermal/db8500_cpufreq_cooling.c @@ -72,6 +72,7 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = { { .compatible = "stericsson,db8500-cpufreq-cooling" }, {}, }; +MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match); #endif static struct platform_driver db8500_cpufreq_cooling_driver = { -- cgit v0.10.2 From 459ac37506d195713b5e82271a2ac44a777e47df Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 17 Aug 2015 19:21:42 +0100 Subject: thermal: cpu_cooling: don't call kcalloc() under rcu_read_lock build_dyn_power_table() allocates the power table while holding rcu_read_lock. kcalloc using GFP_KERNEL may sleep, so it can't be called in an RCU read-side path. Move the rcu protection to the part of the function that really needs it: the part that handles the dev_pm_opp pointer received from dev_pm_opp_find_freq_ceil(). In the unlikely case that there is an OPP added to the cpu while this function is running, return -EAGAIN. Fixes: c36cf0717631 ("thermal: cpu_cooling: implement the power cooling device API") Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 620dcd4..b327556 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -262,7 +262,9 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, * efficiently. Power is stored in mW, frequency in KHz. The * resulting table is in ascending order. * - * Return: 0 on success, -E* on error. + * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs, + * -ENOMEM if we run out of memory or -EAGAIN if an OPP was + * added/enabled while the function was executing. */ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, u32 capacitance) @@ -270,11 +272,9 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, struct power_table *power_table; struct dev_pm_opp *opp; struct device *dev = NULL; - int num_opps = 0, cpu, i, ret = 0; + int num_opps = 0, cpu, i; unsigned long freq; - rcu_read_lock(); - for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { dev = get_cpu_device(cpu); if (!dev) { @@ -284,24 +284,20 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, } num_opps = dev_pm_opp_get_opp_count(dev); - if (num_opps > 0) { + if (num_opps > 0) break; - } else if (num_opps < 0) { - ret = num_opps; - goto unlock; - } + else if (num_opps < 0) + return num_opps; } - if (num_opps == 0) { - ret = -EINVAL; - goto unlock; - } + if (num_opps == 0) + return -EINVAL; power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL); - if (!power_table) { - ret = -ENOMEM; - goto unlock; - } + if (!power_table) + return -ENOMEM; + + rcu_read_lock(); for (freq = 0, i = 0; opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp); @@ -309,6 +305,11 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, u32 freq_mhz, voltage_mv; u64 power; + if (i >= num_opps) { + rcu_read_unlock(); + return -EAGAIN; + } + freq_mhz = freq / 1000000; voltage_mv = dev_pm_opp_get_voltage(opp) / 1000; @@ -326,18 +327,16 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, power_table[i].power = power; } - if (i == 0) { - ret = PTR_ERR(opp); - goto unlock; - } + rcu_read_unlock(); + + if (i != num_opps) + return PTR_ERR(opp); cpufreq_device->cpu_dev = dev; cpufreq_device->dyn_power_table = power_table; cpufreq_device->dyn_power_table_entries = i; -unlock: - rcu_read_unlock(); - return ret; + return 0; } static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, -- cgit v0.10.2 From eba4f88d5af84e0fcaa5d6eb4fe35a75c47203cb Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 17 Aug 2015 19:21:43 +0100 Subject: thermal: cpu_cooling: free power table on error or when unregistering The power table is not being freed on error from cpufreq_cooling register or when unregistering. Free it. Fixes: c36cf0717631 ("thermal: cpu_cooling: implement the power cooling device API") Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index b327556..42c6f71 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -272,7 +272,7 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, struct power_table *power_table; struct dev_pm_opp *opp; struct device *dev = NULL; - int num_opps = 0, cpu, i; + int num_opps = 0, cpu, i, ret = 0; unsigned long freq; for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { @@ -307,7 +307,8 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, if (i >= num_opps) { rcu_read_unlock(); - return -EAGAIN; + ret = -EAGAIN; + goto free_power_table; } freq_mhz = freq / 1000000; @@ -329,14 +330,21 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, rcu_read_unlock(); - if (i != num_opps) - return PTR_ERR(opp); + if (i != num_opps) { + ret = PTR_ERR(opp); + goto free_power_table; + } cpufreq_device->cpu_dev = dev; cpufreq_device->dyn_power_table = power_table; cpufreq_device->dyn_power_table_entries = i; return 0; + +free_power_table: + kfree(power_table); + + return ret; } static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, @@ -846,7 +854,7 @@ __cpufreq_cooling_register(struct device_node *np, ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); if (ret) { cool_dev = ERR_PTR(ret); - goto free_table; + goto free_power_table; } snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", @@ -888,6 +896,8 @@ __cpufreq_cooling_register(struct device_node *np, remove_idr: release_idr(&cpufreq_idr, cpufreq_dev->id); +free_power_table: + kfree(cpufreq_dev->dyn_power_table); free_table: kfree(cpufreq_dev->freq_table); free_time_in_idle_timestamp: @@ -1038,6 +1048,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) thermal_cooling_device_unregister(cpufreq_dev->cool_dev); release_idr(&cpufreq_idr, cpufreq_dev->id); + kfree(cpufreq_dev->dyn_power_table); kfree(cpufreq_dev->time_in_idle_timestamp); kfree(cpufreq_dev->time_in_idle); kfree(cpufreq_dev->freq_table); -- cgit v0.10.2 From 9e26b0b114adb321a9bf41520bac304ef49e77d1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:52 +0800 Subject: gpio: mxc: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxc_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot [Manually rebased] Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index b752b56..8813aba 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -339,13 +339,15 @@ static int gpio_set_wake_irq(struct irq_data *d, u32 enable) return 0; } -static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) +static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxc", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; gc->private = port; ct = gc->chip_types; @@ -360,6 +362,8 @@ static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static void mxc_gpio_get_hw(struct platform_device *pdev) @@ -477,12 +481,16 @@ static int mxc_gpio_probe(struct platform_device *pdev) } /* gpio-mxc can be a generic irq chip */ - mxc_gpio_init_gc(port, irq_base); + err = mxc_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; list_add_tail(&port->node, &mxc_gpio_ports); return 0; +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); out_gpiochip_remove: -- cgit v0.10.2 From 1bbc557d976b4e5ae9a41d619bd79f09ccac9afc Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:53 +0800 Subject: gpio: mxs: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxs_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b7f383e..1387385 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -196,13 +196,16 @@ static int mxs_gpio_set_wake_irq(struct irq_data *d, unsigned int enable) return 0; } -static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) +static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxs", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; + gc->private = port; ct = gc->chip_types; @@ -216,6 +219,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) @@ -317,7 +322,9 @@ static int mxs_gpio_probe(struct platform_device *pdev) } /* gpio-mxs can be a generic irq chip */ - mxs_gpio_init_gc(port, irq_base); + err = mxs_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; /* setup one handler for each entry */ irq_set_chained_handler_and_data(port->irq, mxs_gpio_irq_handler, @@ -343,6 +350,8 @@ static int mxs_gpio_probe(struct platform_device *pdev) out_bgpio_remove: bgpio_remove(&port->bgc); +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); return err; -- cgit v0.10.2 From aad38b75fb632200eb64282469458d21ce2cc39a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 25 Aug 2015 09:12:43 +0000 Subject: gpio: rcar: GPIO_RCAR doesn't relate to ARM 8cd1470("gpio: rcar: Add r8a7795 (R-Car H3) support") added GPIO support for r8a7795. r8a7795 based on CONFIG_ARM64. OTOH, GPIO_RCAR driver can be compiled fine on non-ARM. This patch removed ARM dependency for it. Signed-off-by: Kuninori Morimoto Acked-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Linus Walleij diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b4fc9e4..8949b3f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -356,7 +356,7 @@ config GPIO_PXA config GPIO_RCAR tristate "Renesas R-Car GPIO" - depends on ARM && (ARCH_SHMOBILE || COMPILE_TEST) + depends on ARCH_SHMOBILE || COMPILE_TEST select GPIOLIB_IRQCHIP help Say yes here to support GPIO on Renesas R-Car SoCs. -- cgit v0.10.2 From e20538b82f1ffcc06e68feb117f24f211cff7a4d Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 28 Aug 2015 09:44:18 -0700 Subject: gpio: Propagate errors from chip->get() It's possible to have gpio chips hanging off unreliable remote buses where the get() operation will fail to acquire a readout of the current gpio state. Propagate these errors to the consumer so that they can act on, retry or ignore these failing reads, instead of treating them as the line being held high. Signed-off-by: Bjorn Andersson Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 980c1f8..5db3445 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1174,15 +1174,16 @@ EXPORT_SYMBOL_GPL(gpiod_is_active_low); * that the GPIO was actually requested. */ -static bool _gpiod_get_raw_value(const struct gpio_desc *desc) +static int _gpiod_get_raw_value(const struct gpio_desc *desc) { struct gpio_chip *chip; - bool value; int offset; + int value; chip = desc->chip; offset = gpio_chip_hwgpio(desc); - value = chip->get ? chip->get(chip, offset) : false; + value = chip->get ? chip->get(chip, offset) : -EIO; + value = value < 0 ? value : !!value; trace_gpio_value(desc_to_gpio(desc), 1, value); return value; } @@ -1192,7 +1193,7 @@ static bool _gpiod_get_raw_value(const struct gpio_desc *desc) * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1212,7 +1213,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1226,6 +1227,9 @@ int gpiod_get_value(const struct gpio_desc *desc) WARN_ON(desc->chip->can_sleep); value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; @@ -1548,7 +1552,7 @@ EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq); * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1566,7 +1570,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1579,6 +1583,9 @@ int gpiod_get_value_cansleep(const struct gpio_desc *desc) return 0; value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; -- cgit v0.10.2 From 69de52ba321dda8dd7f632d1e480983494325ba0 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:09 +0200 Subject: Documentation: gpio: board: add flags parameter to gpiod_get*() functions With commit 39b2bbe3d715 ("gpio: add flags argument to gpiod_get*() functions") the gpiod_get*() functions got a 'flags' parameter. Reflect this in the documentation, too. Signed-off-by: Dirk Behme Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index b80606d..9edd5af 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -39,11 +39,11 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). @@ -142,13 +142,14 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); - gpiod_direction_output(power, 1); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); -Since the "power" GPIO is mapped as active-low, its actual signal will be 0 -after this code. Contrary to the legacy integer GPIO interface, the active-low -property is handled during mapping and is thus transparent to GPIO consumers. +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the legacy +integer GPIO interface, the active-low property is handled during mapping and is +thus transparent to GPIO consumers. -- cgit v0.10.2 From 87e77e46c61a9333227ab41cdefb1875758a4f13 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:10 +0200 Subject: Documentation: gpio: board: describe the con_id parameter The con_id parameter has to match the GPIO description and is automatically extended by the GPIO suffix if not NULL. I had to look into the code to understand this and properly find the GPIO I've been looking for, so document this. Signed-off-by: Dirk Behme Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 9edd5af..5fa069a 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -48,6 +48,15 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). +The second parameter of the gpiod_get() functions, the con_id string, has to be +the -prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(snprintf(... "%s-%s", con_id, gpio_suffixes[]). + ACPI ---- ACPI also supports function names for GPIOs in a similar fashion to DT. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index a206639..e000502 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -39,6 +39,9 @@ device that displays digits), an additional index argument can be specified: const char *con_id, unsigned int idx, enum gpiod_flags flags) +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/gpio/board.txt + The flags parameter is used to optionally specify a direction and initial value for the GPIO. Values can be: -- cgit v0.10.2 From e799f35c32ea940222b568abf38d6abcab7fa8c1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 30 Aug 2015 23:58:44 +0200 Subject: gpio: sx150x: Remove unnecessary MODULE_ALIAS() The driver has a I2C device id table that is used to create the module aliases and also "sx150x" isn't a supported I2C id, so it's never used. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c index 458d9d7..9c6b967 100644 --- a/drivers/gpio/gpio-sx150x.c +++ b/drivers/gpio/gpio-sx150x.c @@ -706,4 +706,3 @@ module_exit(sx150x_exit); MODULE_AUTHOR("Gregory Bean "); MODULE_DESCRIPTION("Driver for Semtech SX150X I2C GPIO Expanders"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("i2c:sx150x"); -- cgit v0.10.2 From 5e606abef57a89b3ca25f5d97a953c6cdad7cbac Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Aug 2015 11:44:49 -0700 Subject: gpio: omap: Fix gpiochip_add() handling for deferred probe Currently we gpio-omap breaks if gpiochip_add() returns -EPROBE_DEFER: [ 0.570000] gpiochip_add: GPIOs 0..31 (gpio) failed to register [ 0.570000] omap_gpio 48310000.gpio: Could not register gpio chip -517 ... [ 3.670000] omap_gpio 48310000.gpio: Unbalanced pm_runtime_enable! Let's fix the issue by adding the missing pm_runtime_put() on error. Cc: Grygorii Strashko Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 2ae0d47..892a9d1 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1253,8 +1253,11 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); ret = omap_gpio_chip_init(bank, irqc); - if (ret) + if (ret) { + pm_runtime_put_sync(bank->dev); + pm_runtime_disable(bank->dev); return ret; + } omap_gpio_show_rev(bank); -- cgit v0.10.2 From ae80d64ee8c88b77c58254bcdc5c0981faab672d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 1 Sep 2015 10:46:15 +0200 Subject: Documentation: gpio: Explain that -gpio is also supported The GPIO documentation mentions that GPIOs are mapped by defining a -gpios property in the consumer device's node but a -gpio sufix is also supported after commit: dd34c37aa3e8 ("gpio: of: Allow -gpio suffix for property names") Update the documentation to match the implementation. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 5fa069a..f59c43b 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -21,8 +21,8 @@ exact way to do it depends on the GPIO controller providing the GPIOs, see the device tree bindings for your controller. GPIOs mappings are defined in the consumer device's node, in a property named --gpios, where is the function the driver will request -through gpiod_get(). For example: +either -gpios or -gpio, where is the function +the driver will request through gpiod_get(). For example: foo_device { compatible = "acme,foo"; @@ -31,7 +31,7 @@ through gpiod_get(). For example: <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ - power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>; }; This property will make GPIOs 15, 16 and 17 available to the driver under the -- cgit v0.10.2 From 46d4f7c25e1bb59b1663878b843a7ec06eaf5806 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:31:27 -0700 Subject: gpio: omap: Fix GPIO numbering for deferred probe If gpio-omap probe fails with -EPROBE_DEFER, the GPIO numbering keeps increasing. Only increase the gpio count if gpiochip_add() was successful as otherwise the numbers will increase for each probe attempt. Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Reviewed-by: Grygorii Strashko Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 892a9d1..072af52 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1098,7 +1098,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) } else { bank->chip.label = "gpio"; bank->chip.base = gpio; - gpio += bank->width; } bank->chip.ngpio = bank->width; @@ -1108,6 +1107,9 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) return ret; } + if (!bank->is_mpuio) + gpio += bank->width; + #ifdef CONFIG_ARCH_OMAP1 /* * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop -- cgit v0.10.2 From d259ec26a6c541a5437e9ed0a1e1891342af3cff Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:26 +0200 Subject: pinctrl: qcom: ssbi: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update some drivers that were introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index c978b31..e1a3721 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -723,9 +723,9 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_gpio_template; diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 2d1b69f..6652b8d 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -814,9 +814,9 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_mpp_template; -- cgit v0.10.2 From 5a99233e9bcacc7ea23e173a75bbb7301abd3e6f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:27 +0200 Subject: pinctrl: digicolor: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update a driver that was introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Acked-by: Baruch Siach Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 461fffc..11f8b83 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -337,9 +337,9 @@ static int dc_pinctrl_probe(struct platform_device *pdev) pmap->dev = &pdev->dev; pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap); - if (!pmap->pctl) { + if (IS_ERR(pmap->pctl)) { dev_err(&pdev->dev, "pinctrl driver registration failed\n"); - return -EINVAL; + return PTR_ERR(pmap->pctl); } ret = dc_gpiochip_add(pmap, pdev->dev.of_node); -- cgit v0.10.2 From 163dc9f39a26b41fc49319fce4145b35f9705789 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 1 Aug 2015 13:22:38 +0900 Subject: pinctrl: join lines that can be a single line within 80 columns There is no reason to break a line shorter than 80 columns. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 67e08cb..29984b3 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -313,8 +313,7 @@ static int pinmux_func_name_to_selector(struct pinctrl_dev *pctldev, /* See if this pctldev has this function */ while (selector < nfuncs) { - const char *fname = ops->get_function_name(pctldev, - selector); + const char *fname = ops->get_function_name(pctldev, selector); if (!strcmp(function, fname)) return selector; -- cgit v0.10.2 From 942cde724075f840ded89390b10dce1a47a4d712 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:34:30 -0700 Subject: pinctrl: core: Warn about NULL gpio_chip in pinctrl_ready_for_gpio_range() If the gpio driver is confused about the numbers for gpio-ranges, pinctrl_ready_for_gpio_range() may get called with invalid GPIO causing a NULL pointer exception. Let's instead provide a warning that allows fixing the problem and return with error. Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 69723e0..9638a00 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -349,6 +349,9 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) struct pinctrl_gpio_range *range = NULL; struct gpio_chip *chip = gpio_to_chip(gpio); + if (WARN(!chip, "no gpio_chip for gpio%i?", gpio)) + return false; + mutex_lock(&pinctrldev_list_mutex); /* Loop over the pin controllers */ -- cgit v0.10.2 From fa84b52cb681b27e6b5e003457562e25a239b9c4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 8 Sep 2015 16:46:40 +0200 Subject: pinctrl: samsung: s3c24xx: fix syntax error ?SYNTAX ERROR irq_desc_get_irq_chip() does not exist. It should be irq_desc_get_chip(). Tested by compiling s3c2410_defconfig. Cc: Thomas Gleixner Reported-by: Paul Gortmaker Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 019844d..d168b39 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -361,7 +361,7 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, u32 offset, u32 range) { struct s3c24xx_eint_data *data = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_irq_chip(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); struct samsung_pinctrl_drv_data *d = data->drvdata; unsigned int pend, mask; -- cgit v0.10.2 From 54c12bc374408faddbff75dbf1a6167c19af39c4 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 14 Sep 2015 01:13:11 -0700 Subject: drm/vmwgfx: Fix up user_dmabuf refcounting If user space calls unreference on a user_dmabuf it will typically kill the struct ttm_base_object member which is responsible for the user-space visibility. However the dmabuf part may still be alive and refcounted. In some situations, like for shared guest-backed surface referencing/opening, the driver may try to reference the struct ttm_base_object member again, causing an immediate kernel warning and a later kernel NULL pointer dereference. Fix this by always maintaining a reference on the struct ttm_base_object member, in situations where it might subsequently be referenced. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8f40692..b60b41f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -631,7 +631,8 @@ extern int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf); + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base); extern int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, struct vmw_dma_buffer *dma_buf, uint32_t *handle); @@ -645,7 +646,8 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo, uint32_t cur_validate_node); extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo); extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t id, struct vmw_dma_buffer **out); + uint32_t id, struct vmw_dma_buffer **out, + struct ttm_base_object **base); extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index b565654..5da5de0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1236,7 +1236,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); ret = -EINVAL; @@ -1296,7 +1297,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); ret = -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index 76069f0..222c9c2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -484,7 +484,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf, NULL); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index c1912f8..e57667c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -354,7 +354,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv, } *out_surf = NULL; - ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf, NULL); return ret; } @@ -481,7 +481,8 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf) + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *user_bo; struct ttm_buffer_object *tmp; @@ -515,6 +516,10 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, } *p_dma_buf = &user_bo->dma; + if (p_base) { + *p_base = &user_bo->prime.base; + kref_get(&(*p_base)->refcount); + } *handle = user_bo->prime.base.hash.key; out_no_base_object: @@ -631,6 +636,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, struct vmw_dma_buffer *dma_buf; struct vmw_user_dma_buffer *user_bo; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct ttm_base_object *buffer_base; int ret; if ((arg->flags & (drm_vmw_synccpu_read | drm_vmw_synccpu_write)) == 0 @@ -643,7 +649,8 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, switch (arg->op) { case drm_vmw_synccpu_grab: - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf, + &buffer_base); if (unlikely(ret != 0)) return ret; @@ -651,6 +658,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, dma); ret = vmw_user_dmabuf_synccpu_grab(user_bo, tfile, arg->flags); vmw_dmabuf_unreference(&dma_buf); + ttm_base_object_unref(&buffer_base); if (unlikely(ret != 0 && ret != -ERESTARTSYS && ret != -EBUSY)) { DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n", @@ -692,7 +700,8 @@ int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data, return ret; ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, - req->size, false, &handle, &dma_buf); + req->size, false, &handle, &dma_buf, + NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -721,7 +730,8 @@ int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data, } int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t handle, struct vmw_dma_buffer **out) + uint32_t handle, struct vmw_dma_buffer **out, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *vmw_user_bo; struct ttm_base_object *base; @@ -743,7 +753,10 @@ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, prime.base); (void)ttm_bo_reference(&vmw_user_bo->dma.base); - ttm_base_object_unref(&base); + if (p_base) + *p_base = base; + else + ttm_base_object_unref(&base); *out = &vmw_user_bo->dma; return 0; @@ -1004,7 +1017,7 @@ int vmw_dumb_create(struct drm_file *file_priv, ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, args->size, false, &args->handle, - &dma_buf); + &dma_buf, NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -1032,7 +1045,7 @@ int vmw_dumb_map_offset(struct drm_file *file_priv, struct vmw_dma_buffer *out_buf; int ret; - ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf, NULL); if (ret != 0) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index bba1ee3..fd47547 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -855,7 +855,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, if (buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, buffer_handle, - &buffer); + &buffer, NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find buffer for shader " "creation.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 3361769..64b5040 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -46,6 +46,7 @@ struct vmw_user_surface { struct vmw_surface srf; uint32_t size; struct drm_master *master; + struct ttm_base_object *backup_base; }; /** @@ -656,6 +657,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + ttm_base_object_unref(&user_srf->backup_base); vmw_resource_unreference(&res); } @@ -851,7 +853,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, res->backup_size, true, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; @@ -1321,7 +1324,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, if (req->buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (ret == 0 && res->backup->base.num_pages * PAGE_SIZE < res->backup_size) { DRM_ERROR("Surface backup buffer is too small.\n"); @@ -1335,7 +1339,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, req->drm_surface_flags & drm_vmw_surface_flag_shareable, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); -- cgit v0.10.2 From 2e586a7e017a502410ba1c1a7411179e1d3fbb2b Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 14 Sep 2015 01:17:43 -0700 Subject: drm/vmwgfx: Map the fifo as cached On the guest kernel side, previously the FIFO has been mapped write- combined. This has worked since VMs up to now has not honored the mapping type and mapped the FIFO cached anyway. Since the FIFO is accessed cached by the CPU on the virtual device side, this leads to inconsistent mappings once the guest starts to honor the mapping types. So ask for cached mappings when we map the FIFO. We do this by using ioremap_cache() instead of ioremap_wc(), and remove the MTRR setup. On the TTM side, MOBs, GMRs and VRAM buffers are already requesting cached mappings for kernel- and user-space. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e13b20b..2c7a25c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -752,12 +752,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); dev_priv->active_master = &dev_priv->fbdev_master; - - dev_priv->mmio_mtrr = arch_phys_wc_add(dev_priv->mmio_start, - dev_priv->mmio_size); - - dev_priv->mmio_virt = ioremap_wc(dev_priv->mmio_start, - dev_priv->mmio_size); + dev_priv->mmio_virt = ioremap_cache(dev_priv->mmio_start, + dev_priv->mmio_size); if (unlikely(dev_priv->mmio_virt == NULL)) { ret = -ENOMEM; @@ -913,7 +909,6 @@ out_no_device: out_err4: iounmap(dev_priv->mmio_virt); out_err3: - arch_phys_wc_del(dev_priv->mmio_mtrr); vmw_ttm_global_release(dev_priv); out_err0: for (i = vmw_res_context; i < vmw_res_max; ++i) @@ -964,7 +959,6 @@ static int vmw_driver_unload(struct drm_device *dev) ttm_object_device_release(&dev_priv->tdev); iounmap(dev_priv->mmio_virt); - arch_phys_wc_del(dev_priv->mmio_mtrr); if (dev_priv->ctx.staged_bindings) vmw_binding_state_free(dev_priv->ctx.staged_bindings); vmw_ttm_global_release(dev_priv); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b60b41f..a20f482 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -376,7 +376,6 @@ struct vmw_private { uint32_t initial_width; uint32_t initial_height; u32 __iomem *mmio_virt; - int mmio_mtrr; uint32_t capabilities; uint32_t max_gmr_ids; uint32_t max_gmr_pages; -- cgit v0.10.2 From 5aac2d3368210f15c2fc73f158e2110a0bb611ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:11:26 +0200 Subject: powerpc/mpc5121_ads_cpld: Prepare irq handler for irq argument removal The irq argument of most interrupt flow handlers is unused or merily used instead of a local variable. The handlers which need the irq argument can retrieve the irq number from the irq descriptor. Search and update was done with coccinelle and the invaluable help of Julia Lawall. Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu Cc: Anatolij Gustschin Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index 11090ab..cf82874 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -105,8 +105,10 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, } static void -cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) +cpld_pic_cascade(unsigned int __irq, struct irq_desc *desc) { + unsigned int irq; + irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); if (irq != NO_IRQ) { -- cgit v0.10.2 From 0a0dbd92584061549d9775030269365dc16ad289 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:13:42 +0200 Subject: powerpc/85xx: Prepare irq handlers for irq argument removal The irq argument of most interrupt flow handlers is unused or merily used instead of a local variable. The handlers which need the irq argument can retrieve the irq number from the irq descriptor. Search and update was done with coccinelle and the invaluable help of Julia Lawall. Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu Cc: Scott Wood Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index b0753e2..13a8d1a 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -192,9 +192,10 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus) } #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade_handler(unsigned int irq, +static void mpc85xx_8259_cascade_handler(unsigned int __irq, struct irq_desc *desc) { + unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 55a9682..d78fda8 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -91,9 +91,10 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) (irq_hw_number_t)i); } -void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc) +void socrates_fpga_pic_cascade(unsigned int __irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq; /* -- cgit v0.10.2 From 391de7f9ef9e6a500343d977ccd037b70e62aa45 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:17:00 +0200 Subject: powerpc/cell: Prepare irq handler for irq argument removal The irq argument of most interrupt flow handlers is unused or merily used instead of a local variable. The handlers which need the irq argument can retrieve the irq number from the irq descriptor. Search and update was done with coccinelle and the invaluable help of Julia Lawall. Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu Cc: Arnd Bergmann Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index a15f1ef..6558e7e 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -99,11 +99,12 @@ static void iic_ioexc_eoi(struct irq_data *d) { } -static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc) +static void iic_ioexc_cascade(unsigned int __irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cbe_iic_regs __iomem *node_iic = (void __iomem *)irq_desc_get_handler_data(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; unsigned long bits, ack; int cascade; -- cgit v0.10.2 From 5230347ea70a811f0a526e8e4f0f529ac31b7d18 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:27:13 +0200 Subject: soc: dove: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc Search and replace done with coccinelle Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c index 6792aae..4decb26 100644 --- a/drivers/soc/dove/pmu.c +++ b/drivers/soc/dove/pmu.c @@ -224,7 +224,7 @@ static void __pmu_domain_register(struct pmu_domain *domain, /* PMU IRQ controller */ static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) { - struct pmu_data *pmu = irq_get_handler_data(irq); + struct pmu_data *pmu = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = pmu->irq_gc; struct irq_domain *domain = pmu->irq_domain; void __iomem *base = gc->reg_base; -- cgit v0.10.2 From b838c950b397ebed94eb11b473f1f17428a1443d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:20:25 +0200 Subject: soc: dove: Prepare irq handler for irq argument removal The irq argument of most interrupt flow handlers is unused or merily used instead of a local variable. The handlers which need the irq argument can retrieve the irq number from the irq descriptor. Search and update was done with coccinelle and the invaluable help of Julia Lawall. Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu Cc: Gregory CLEMENT diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c index 4decb26..6bc13f9 100644 --- a/drivers/soc/dove/pmu.c +++ b/drivers/soc/dove/pmu.c @@ -232,7 +232,7 @@ static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) u32 done = ~0; if (stat == 0) { - handle_bad_irq(irq, desc); + handle_bad_irq(irq_desc_get_irq(desc), desc); return; } -- cgit v0.10.2 From 1e6428124fe22906be0de1622c8fed8e50e5de05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 5 Sep 2015 08:58:10 +0200 Subject: x86/vm86: Fix the misleading CONFIG_VM86 Kconfig help text The CONFIG_VM86 Kconfig help text is actively misleading, so fix it: - Don't mark it 'obsolete' in the text as we'll support the ABI as long as CPUs support it. - Qualify the part about software emulation and mention that for some apps you want a real vm86 mode. - Don't scare users away from the option, instead explain what it does. Reported-by: Stas Sergeev Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Linus Torvalds Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 48f7433..d288153 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1004,7 +1004,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config X86_LEGACY_VM86 - bool "Legacy VM86 support (obsolete)" + bool "Legacy VM86 support" default n depends on X86_32 ---help--- @@ -1016,19 +1016,20 @@ config X86_LEGACY_VM86 available to accelerate real mode DOS programs. However, any recent version of DOSEMU, X, or vbetool should be fully functional even without kernel VM86 support, as they will all - fall back to (pretty well performing) software emulation. + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. - Anything that works on a 64-bit kernel is unlikely to need - this option, as 64-bit kernels don't, and can't, support V8086 - mode. This option is also unrelated to 16-bit protected mode - and is not needed to run most 16-bit programs under Wine. + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. - Enabling this option adds considerable attack surface to the - kernel and slows down system calls and exception handling. + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. - Unless you use very old userspace or need the last drop of - performance in your real mode DOS games and can't use KVM, - say N here. + If unsure, say N here. config VM86 bool -- cgit v0.10.2 From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Sep 2015 09:34:26 +0530 Subject: clockevents: Remove unused set_mode() callback All users are migrated to the per-state callbacks, get rid of the unused interface and the core support code. Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Cc: linaro-kernel@lists.linaro.org Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 31ce435..bdcf358 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -18,15 +18,6 @@ struct clock_event_device; struct module; -/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ -enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED, - CLOCK_EVT_MODE_SHUTDOWN, - CLOCK_EVT_MODE_PERIODIC, - CLOCK_EVT_MODE_ONESHOT, - CLOCK_EVT_MODE_RESUME, -}; - /* * Possible states of a clock event device. * @@ -86,16 +77,14 @@ enum clock_event_state { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_state_periodic: switch state to periodic, if !set_mode - * @set_state_oneshot: switch state to oneshot, if !set_mode - * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode - * @set_state_shutdown: switch state to shutdown, if !set_mode - * @tick_resume: resume clkevt device, if !set_mode + * @set_state_periodic: switch state to periodic + * @set_state_oneshot: switch state to oneshot + * @set_state_oneshot_stopped: switch state to oneshot_stopped + * @set_state_shutdown: switch state to shutdown + * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -116,18 +105,10 @@ struct clock_event_device { u64 min_delta_ns; u32 mult; u32 shift; - enum clock_event_mode mode; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; - /* - * State transition callback(s): Only one of the two groups should be - * defined: - * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). - */ - void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107..a9b76a4 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); static int __clockevents_switch_state(struct clock_event_device *dev, enum clock_event_state state) { - /* Transition with legacy set_mode() callback */ - if (dev->set_mode) { - /* Legacy callback doesn't support new modes */ - if (state > CLOCK_EVT_STATE_ONESHOT) - return -ENOSYS; - /* - * 'clock_event_state' and 'clock_event_mode' have 1-to-1 - * mapping until *_ONESHOT, and so a simple cast will work. - */ - dev->set_mode((enum clock_event_mode)state, dev); - dev->mode = (enum clock_event_mode)state; - return 0; - } - if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; @@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) { - dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - dev->mode = CLOCK_EVT_MODE_RESUME; - } else if (dev->tick_resume) { + if (dev->tick_resume) ret = dev->tick_resume(dev); - } return ret; } @@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind_device); -/* Sanity check of state transition callbacks */ -static int clockevents_sanity_check(struct clock_event_device *dev) -{ - /* Legacy set_mode() callback */ - if (dev->set_mode) { - /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || - dev->set_state_shutdown || dev->tick_resume || - dev->set_state_oneshot_stopped); - - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - return 0; - } - - if (dev->features & CLOCK_EVT_FEAT_DUMMY) - return 0; - - return 0; -} - /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(clockevents_sanity_check(dev)); - /* Initialize state to DETACHED */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b..4fcd99e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) * the set mode function! */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); - dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c960..f75e35b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) (unsigned long long) dev->min_delta_ns); SEQ_printf(m, " mult: %u\n", dev->mult); SEQ_printf(m, " shift: %u\n", dev->shift); - SEQ_printf(m, " mode: %d\n", dev->mode); + SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); @@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - if (dev->set_mode) { - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); + if (dev->set_state_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); - } else { - if (dev->set_state_shutdown) { - SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_state_shutdown); - SEQ_printf(m, "\n"); - } + } - if (dev->set_state_periodic) { - SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_state_periodic); - SEQ_printf(m, "\n"); - } + if (dev->set_state_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_state_periodic); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot) { - SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_state_oneshot); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_state_oneshot); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot_stopped) { - SEQ_printf(m, " oneshot stopped: "); - print_name_offset(m, dev->set_state_oneshot_stopped); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot_stopped) { + SEQ_printf(m, " oneshot stopped: "); + print_name_offset(m, dev->set_state_oneshot_stopped); + SEQ_printf(m, "\n"); + } - if (dev->tick_resume) { - SEQ_printf(m, " resume: "); - print_name_offset(m, dev->tick_resume); - SEQ_printf(m, "\n"); - } + if (dev->tick_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->tick_resume); + SEQ_printf(m, "\n"); } SEQ_printf(m, " event_handler: "); -- cgit v0.10.2 From f454b478861325f067fd58ba7ee9f1b5c4a9d6a0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 Sep 2015 09:45:58 -0600 Subject: x86/ldt: Fix small LDT allocation for Xen While the following commit: 37868fe113 ("x86/ldt: Make modify_ldt synchronous") added a nice comment explaining that Xen needs page-aligned whole page chunks for guest descriptor tables, it then nevertheless used kzalloc() on the small size path. As I'm unaware of guarantees for kmalloc(PAGE_SIZE, ) to return page-aligned memory blocks, I believe this needs to be switched back to __get_free_page() (or better get_zeroed_page()). Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/55E735D6020000780009F1E6@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc052..6acc9dd 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) if (alloc_size > PAGE_SIZE) new_ldt->entries = vzalloc(alloc_size); else - new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); if (!new_ldt->entries) { kfree(new_ldt); @@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(ldt->entries); else - kfree(ldt->entries); + free_page((unsigned long)ldt->entries); kfree(ldt); } -- cgit v0.10.2 From ba9cc453c400049f632d4eb2f2835e2f96654ddc Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 11 Sep 2015 08:49:47 +0100 Subject: arm64: dma-mapping: check whether cma area is initialized or not If CMA is turned on and CMA size is set to zero, kernel should behave as if CMA was not enabled at compile time. Every dma allocation should check existence of cma area before requesting memory. Arm has done this by commit e464ef16c4f0 ("arm: dma-mapping: add checking cma area initialized"), also do this for arm64. Acked-by: Catalin Marinas Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0bcc4bc..99224dc 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { + if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { struct page *page; void *addr; -- cgit v0.10.2 From b847415ce96efef819534b230d84695b1bc6d36b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:00 +0100 Subject: arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled Commit 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") introduced support for handling hardware updates of the access flag and dirty status. The PTE is automatically dirtied in hardware (if supported) by clearing the PTE_RDONLY bit when the PTE_DBM/PTE_WRITE bit is set. The pte_hw_dirty() macro was added to detect a hardware dirtied pte. The pte_dirty() macro checks for both software PTE_DIRTY and pte_hw_dirty(). Functions like pte_modify() clear the PTE_RDONLY bit since it is meant to be set in set_pte_at() when written to memory. In such cases, pte_hw_dirty() would return true even though such pte is clean. This patch changes pte_hw_dirty() to test the PTE_DBM/PTE_WRITE bit together with PTE_RDONLY. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Reported-by: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6900b2d9..69207f0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -146,7 +146,7 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #ifdef CONFIG_ARM64_HW_AFDBM -#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else #define pte_hw_dirty(pte) (0) #endif @@ -238,7 +238,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * - * PTE_DIRTY || !PTE_RDONLY + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) -- cgit v0.10.2 From 62d96c71d248834af2891293dc23cc344ae2ec36 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:01 +0100 Subject: arm64: Fix pte_modify() to preserve the hardware dirty information The pte_modify() function with hardware AF/DBM enabled must transfer the hardware dirty information to the software PTE_DIRTY bit. However, it was setting this bit in newprot and the mask does not cover such bit. This patch sets PTE_DIRTY on the original pte which will be preserved in the returned value. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Cc: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 69207f0..31df98a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -503,7 +503,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - newprot |= PTE_DIRTY; + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } -- cgit v0.10.2 From bf950040a53da35522e38066d9eb6ab7a1c9d136 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 18:22:02 +0100 Subject: arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM Depending on CONFIG_ARM64_HW_AFDBM, we use either bit 57 or 51 of the pte to represent PTE_WRITE. Given that bit 51 is reserved prior to ARMv8.1, we can just use that bit regardless of the config option. That also matches what happens if a kernel configured with ARM64_HW_AFDBM=y is run on a CPU without the DBM functionality. Cc: Julien Grall Tested-by: Julien Grall Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 31df98a..b0329be 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,13 +26,9 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#ifdef CONFIG_ARM64_HW_AFDBM -#define PTE_WRITE (PTE_DBM) /* same as DBM */ -#else -#define PTE_WRITE (_AT(pteval_t, 1) << 57) -#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* -- cgit v0.10.2 From cda34fc774d114afe98515a21c2063a803f922bc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Sep 2015 12:42:23 +0200 Subject: x86/paravirt: Remove the unused pv_time_ops::get_tsc_khz method It's not used anywhere. Signed-off-by: Juergen Gross Acked-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: chrisw@sous-sol.org Cc: jeremy@goop.org Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1442227343-403-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4..31247b5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { -- cgit v0.10.2 From 205ee117d4dc4a11ac3bd9638bb9b2e839f4de9a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Sep 2015 02:57:21 +0200 Subject: netfilter: nf_log: don't zap all loggers on unregister like nf_log_unset, nf_log_unregister must not reset the list of loggers. Otherwise, a call to nf_log_unregister() will render loggers of other nf protocols unusable: iptables -A INPUT -j LOG modprobe nf_log_arp ; rmmod nf_log_arp iptables -A INPUT -j LOG iptables: No chain/target/match by that name Fixes: 30e0c6a6be ("netfilter: nf_log: prepare net namespace support for loggers") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 675d12c..a5ebd7d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -107,11 +107,15 @@ EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { + const struct nf_logger *log; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < NFPROTO_NUMPROTO; i++) - RCU_INIT_POINTER(loggers[i][logger->type], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = nft_log_dereference(loggers[i][logger->type]); + if (log == logger) + RCU_INIT_POINTER(loggers[i][logger->type], NULL); + } mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unregister); -- cgit v0.10.2 From c973c3bcec3752455c4d7545edd42935cd7942d9 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:50 +0100 Subject: thermal: Add a function to get the minimum power The thermal core already has a function to get the maximum power of a cooling device: power_actor_get_max_power(). Add a function to get the minimum power of a cooling device. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 5e5fc70..d9e525c 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1013,6 +1013,34 @@ int power_actor_get_max_power(struct thermal_cooling_device *cdev, } /** + * power_actor_get_min_power() - get the mainimum power that a cdev can consume + * @cdev: pointer to &thermal_cooling_device + * @tz: a valid thermal zone device pointer + * @min_power: pointer in which to store the minimum power + * + * Calculate the minimum power consumption in milliwatts that the + * cooling device can currently consume and store it in @min_power. + * + * Return: 0 on success, -EINVAL if @cdev doesn't support the + * power_actor API or -E* on other error. + */ +int power_actor_get_min_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *min_power) +{ + unsigned long max_state; + int ret; + + if (!cdev_is_power_actor(cdev)) + return -EINVAL; + + ret = cdev->ops->get_max_state(cdev, &max_state); + if (ret) + return ret; + + return cdev->ops->state2power(cdev, tz, max_state, min_power); +} + +/** * power_actor_set_power() - limit the maximum power that a cooling device can consume * @cdev: pointer to &thermal_cooling_device * @instance: thermal instance to update diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0c5518e..157d366 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -380,6 +380,8 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) int power_actor_get_max_power(struct thermal_cooling_device *, struct thermal_zone_device *tz, u32 *max_power); +int power_actor_get_min_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *min_power); int power_actor_set_power(struct thermal_cooling_device *, struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, @@ -415,6 +417,10 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 *max_power) { return 0; } +static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 *min_power) +{ return -ENODEV; } static inline int power_actor_set_power(struct thermal_cooling_device *cdev, struct thermal_instance *tz, u32 power) { return 0; } -- cgit v0.10.2 From e055bb0f9a6e5c09bedf41c2a5b881edbd7f2ed0 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:51 +0100 Subject: thermal: power_allocator: relax the requirement of a sustainable_power in tzp The power allocator governor currently requires that a sustainable power is passed as part of the thermal zone's thermal zone parameters. If that parameter is not provided, it doesn't register with the thermal zone. While this parameter is strongly recommended for optimal performance, it doesn't need to be mandatory. Relax the requirement and allow the governor to bind to thermal zones that don't provide it by estimating it from the cooling devices' power model. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 9c8a7aa..5147347 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -73,6 +73,88 @@ struct power_allocator_params { }; /** + * estimate_sustainable_power() - Estimate the sustainable power of a thermal zone + * @tz: thermal zone we are operating in + * + * For thermal zones that don't provide a sustainable_power in their + * thermal_zone_params, estimate one. Calculate it using the minimum + * power of all the cooling devices as that gives a valid value that + * can give some degree of functionality. For optimal performance of + * this governor, provide a sustainable_power in the thermal zone's + * thermal_zone_params. + */ +static u32 estimate_sustainable_power(struct thermal_zone_device *tz) +{ + u32 sustainable_power = 0; + struct thermal_instance *instance; + struct power_allocator_params *params = tz->governor_data; + + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + struct thermal_cooling_device *cdev = instance->cdev; + u32 min_power; + + if (instance->trip != params->trip_max_desired_temperature) + continue; + + if (power_actor_get_min_power(cdev, tz, &min_power)) + continue; + + sustainable_power += min_power; + } + + return sustainable_power; +} + +/** + * estimate_pid_constants() - Estimate the constants for the PID controller + * @tz: thermal zone for which to estimate the constants + * @sustainable_power: sustainable power for the thermal zone + * @trip_switch_on: trip point number for the switch on temperature + * @control_temp: target temperature for the power allocator governor + * @force: whether to force the update of the constants + * + * This function is used to update the estimation of the PID + * controller constants in struct thermal_zone_parameters. + * Sustainable power is provided in case it was estimated. The + * estimated sustainable_power should not be stored in the + * thermal_zone_parameters so it has to be passed explicitly to this + * function. + * + * If @force is not set, the values in the thermal zone's parameters + * are preserved if they are not zero. If @force is set, the values + * in thermal zone's parameters are overwritten. + */ +static void estimate_pid_constants(struct thermal_zone_device *tz, + u32 sustainable_power, int trip_switch_on, + int control_temp, bool force) +{ + int ret; + int switch_on_temp; + u32 temperature_threshold; + + ret = tz->ops->get_trip_temp(tz, trip_switch_on, &switch_on_temp); + if (ret) + switch_on_temp = 0; + + temperature_threshold = control_temp - switch_on_temp; + + if (!tz->tzp->k_po || force) + tz->tzp->k_po = int_to_frac(sustainable_power) / + temperature_threshold; + + if (!tz->tzp->k_pu || force) + tz->tzp->k_pu = int_to_frac(2 * sustainable_power) / + temperature_threshold; + + if (!tz->tzp->k_i || force) + tz->tzp->k_i = int_to_frac(10) / 1000; + /* + * The default for k_d and integral_cutoff is 0, so we can + * leave them as they are. + */ +} + +/** * pid_controller() - PID controller * @tz: thermal zone we are operating in * @current_temp: the current temperature in millicelsius @@ -98,10 +180,20 @@ static u32 pid_controller(struct thermal_zone_device *tz, { s64 p, i, d, power_range; s32 err, max_power_frac; + u32 sustainable_power; struct power_allocator_params *params = tz->governor_data; max_power_frac = int_to_frac(max_allocatable_power); + if (tz->tzp->sustainable_power) { + sustainable_power = tz->tzp->sustainable_power; + } else { + sustainable_power = estimate_sustainable_power(tz); + estimate_pid_constants(tz, sustainable_power, + params->trip_switch_on, control_temp, + true); + } + err = control_temp - current_temp; err = int_to_frac(err); @@ -139,7 +231,7 @@ static u32 pid_controller(struct thermal_zone_device *tz, power_range = p + i + d; /* feed-forward the known sustainable dissipatable power */ - power_range = tz->tzp->sustainable_power + frac_to_int(power_range); + power_range = sustainable_power + frac_to_int(power_range); power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); @@ -416,19 +508,18 @@ static int power_allocator_bind(struct thermal_zone_device *tz) { int ret; struct power_allocator_params *params; - int switch_on_temp, control_temp; - u32 temperature_threshold; + int control_temp; - if (!tz->tzp || !tz->tzp->sustainable_power) { - dev_err(&tz->device, - "power_allocator: missing sustainable_power\n"); + if (!tz->tzp) return -EINVAL; - } params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; + if (!tz->tzp->sustainable_power) + dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); + ret = get_governor_trips(tz, params); if (ret) { dev_err(&tz->device, @@ -437,29 +528,13 @@ static int power_allocator_bind(struct thermal_zone_device *tz) goto free; } - ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, - &switch_on_temp); - if (ret) - goto free; - ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, &control_temp); if (ret) goto free; - temperature_threshold = control_temp - switch_on_temp; - - tz->tzp->k_po = tz->tzp->k_po ?: - int_to_frac(tz->tzp->sustainable_power) / temperature_threshold; - tz->tzp->k_pu = tz->tzp->k_pu ?: - int_to_frac(2 * tz->tzp->sustainable_power) / - temperature_threshold; - tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000; - /* - * The default for k_d and integral_cutoff is 0, so we can - * leave them as they are. - */ - + estimate_pid_constants(tz, tz->tzp->sustainable_power, + params->trip_switch_on, control_temp, false); reset_pid_controller(params); tz->governor_data = params; -- cgit v0.10.2 From 8b7b390f805f09ff252351468a79ebabde1ab55a Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:52 +0100 Subject: thermal: power_allocator: relax the requirement of two passive trip points The power allocator governor currently requires that the thermal zone has at least two passive trip points. If there aren't, the governor refuses to bind to the thermal zone. This commit relaxes that requirement. Now the governor will bind to all thermal zones regardless of how many trip points they have. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt index c3797b5..a1ce223 100644 --- a/Documentation/thermal/power_allocator.txt +++ b/Documentation/thermal/power_allocator.txt @@ -4,7 +4,7 @@ Power allocator governor tunables Trip points ----------- -The governor requires the following two passive trip points: +The governor works optimally with the following two passive trip points: 1. "switch on" trip point: temperature above which the governor control loop starts operating. This is the first passive trip diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 5147347..ea57759 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -24,6 +24,8 @@ #include "thermal_core.h" +#define INVALID_TRIP -1 + #define FRAC_BITS 10 #define int_to_frac(x) ((x) << FRAC_BITS) #define frac_to_int(x) ((x) >> FRAC_BITS) @@ -61,6 +63,8 @@ static inline s64 div_frac(s64 x, s64 y) * Used to calculate the derivative term. * @trip_switch_on: first passive trip point of the thermal zone. The * governor switches on when this trip point is crossed. + * If the thermal zone only has one passive trip point, + * @trip_switch_on should be INVALID_TRIP. * @trip_max_desired_temperature: last passive trip point of the thermal * zone. The temperature we are * controlling for. @@ -432,43 +436,66 @@ unlock: return ret; } -static int get_governor_trips(struct thermal_zone_device *tz, - struct power_allocator_params *params) +/** + * get_governor_trips() - get the number of the two trip points that are key for this governor + * @tz: thermal zone to operate on + * @params: pointer to private data for this governor + * + * The power allocator governor works optimally with two trips points: + * a "switch on" trip point and a "maximum desired temperature". These + * are defined as the first and last passive trip points. + * + * If there is only one trip point, then that's considered to be the + * "maximum desired temperature" trip point and the governor is always + * on. If there are no passive or active trip points, then the + * governor won't do anything. In fact, its throttle function + * won't be called at all. + */ +static void get_governor_trips(struct thermal_zone_device *tz, + struct power_allocator_params *params) { - int i, ret, last_passive; + int i, last_active, last_passive; bool found_first_passive; found_first_passive = false; - last_passive = -1; - ret = -EINVAL; + last_active = INVALID_TRIP; + last_passive = INVALID_TRIP; for (i = 0; i < tz->trips; i++) { enum thermal_trip_type type; + int ret; ret = tz->ops->get_trip_type(tz, i, &type); - if (ret) - return ret; + if (ret) { + dev_warn(&tz->device, + "Failed to get trip point %d type: %d\n", i, + ret); + continue; + } - if (!found_first_passive) { - if (type == THERMAL_TRIP_PASSIVE) { + if (type == THERMAL_TRIP_PASSIVE) { + if (!found_first_passive) { params->trip_switch_on = i; found_first_passive = true; + } else { + last_passive = i; } - } else if (type == THERMAL_TRIP_PASSIVE) { - last_passive = i; + } else if (type == THERMAL_TRIP_ACTIVE) { + last_active = i; } else { break; } } - if (last_passive != -1) { + if (last_passive != INVALID_TRIP) { params->trip_max_desired_temperature = last_passive; - ret = 0; + } else if (found_first_passive) { + params->trip_max_desired_temperature = params->trip_switch_on; + params->trip_switch_on = INVALID_TRIP; } else { - ret = -EINVAL; + params->trip_switch_on = INVALID_TRIP; + params->trip_max_desired_temperature = last_active; } - - return ret; } static void reset_pid_controller(struct power_allocator_params *params) @@ -497,11 +524,10 @@ static void allow_maximum_power(struct thermal_zone_device *tz) * power_allocator_bind() - bind the power_allocator governor to a thermal zone * @tz: thermal zone to bind it to * - * Check that the thermal zone is valid for this governor, that is, it - * has two thermal trips. If so, initialize the PID controller - * parameters and bind it to the thermal zone. + * Initialize the PID controller parameters and bind it to the thermal + * zone. * - * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM + * Return: 0 on success, -EINVAL if the thermal zone doesn't have tzp or -ENOMEM * if we ran out of memory. */ static int power_allocator_bind(struct thermal_zone_device *tz) @@ -520,30 +546,23 @@ static int power_allocator_bind(struct thermal_zone_device *tz) if (!tz->tzp->sustainable_power) dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); - ret = get_governor_trips(tz, params); - if (ret) { - dev_err(&tz->device, - "thermal zone %s has wrong trip setup for power allocator\n", - tz->type); - goto free; - } + get_governor_trips(tz, params); - ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, - &control_temp); - if (ret) - goto free; + if (tz->trips > 0) { + ret = tz->ops->get_trip_temp(tz, + params->trip_max_desired_temperature, + &control_temp); + if (!ret) + estimate_pid_constants(tz, tz->tzp->sustainable_power, + params->trip_switch_on, + control_temp, false); + } - estimate_pid_constants(tz, tz->tzp->sustainable_power, - params->trip_switch_on, control_temp, false); reset_pid_controller(params); tz->governor_data = params; return 0; - -free: - kfree(params); - return ret; } static void power_allocator_unbind(struct thermal_zone_device *tz) @@ -574,13 +593,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, &switch_on_temp); - if (ret) { - dev_warn(&tz->device, - "Failed to get switch on temperature: %d\n", ret); - return ret; - } - - if (current_temp < switch_on_temp) { + if (!ret && (current_temp < switch_on_temp)) { tz->passive = 0; reset_pid_controller(params); allow_maximum_power(tz); -- cgit v0.10.2 From f5cbb182586ea36c6ad0e90e3534d18a5e9af094 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:53 +0100 Subject: thermal: power_allocator: don't require tzp to be present for the thermal zone Thermal zones created using thermal_zone_device_create() may not have tzp. As the governor gets its parameters from there, allocate it while the governor is bound to the thermal zone so that it can operate in it. In this case, tzp is freed when the thermal zone switches to another governor. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index ea57759..718363f 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -58,6 +58,8 @@ static inline s64 div_frac(s64 x, s64 y) /** * struct power_allocator_params - parameters for the power allocator governor + * @allocated_tzp: whether we have allocated tzp for this thermal zone and + * it needs to be freed on unbind * @err_integral: accumulated error in the PID controller. * @prev_err: error in the previous iteration of the PID controller. * Used to calculate the derivative term. @@ -70,6 +72,7 @@ static inline s64 div_frac(s64 x, s64 y) * controlling for. */ struct power_allocator_params { + bool allocated_tzp; s64 err_integral; s32 prev_err; int trip_switch_on; @@ -527,8 +530,7 @@ static void allow_maximum_power(struct thermal_zone_device *tz) * Initialize the PID controller parameters and bind it to the thermal * zone. * - * Return: 0 on success, -EINVAL if the thermal zone doesn't have tzp or -ENOMEM - * if we ran out of memory. + * Return: 0 on success, or -ENOMEM if we ran out of memory. */ static int power_allocator_bind(struct thermal_zone_device *tz) { @@ -536,13 +538,20 @@ static int power_allocator_bind(struct thermal_zone_device *tz) struct power_allocator_params *params; int control_temp; - if (!tz->tzp) - return -EINVAL; - params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; + if (!tz->tzp) { + tz->tzp = kzalloc(sizeof(*tz->tzp), GFP_KERNEL); + if (!tz->tzp) { + ret = -ENOMEM; + goto free_params; + } + + params->allocated_tzp = true; + } + if (!tz->tzp->sustainable_power) dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); @@ -563,11 +572,24 @@ static int power_allocator_bind(struct thermal_zone_device *tz) tz->governor_data = params; return 0; + +free_params: + kfree(params); + + return ret; } static void power_allocator_unbind(struct thermal_zone_device *tz) { + struct power_allocator_params *params = tz->governor_data; + dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); + + if (params->allocated_tzp) { + kfree(tz->tzp); + tz->tzp = NULL; + } + kfree(tz->governor_data); tz->governor_data = NULL; } -- cgit v0.10.2 From edb9272f35d8bc97c86101a13f67d0ba16f3eecc Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 14 Sep 2015 17:38:51 +0800 Subject: KVM: fix polling for guest halt continued even if disable it If there is already some polling ongoing, it's impossible to disable the polling, since as soon as somebody sets halt_poll_ns to 0, polling will never stop, as grow and shrink are only handled if halt_poll_ns is != 0. This patch fix it by reset vcpu->halt_poll_ns in order to stop polling when polling is disabled. Reported-by: Christian Borntraeger Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a25a731..eb4c9d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2043,7 +2043,8 @@ out: else if (vcpu->halt_poll_ns < halt_poll_ns && block_ns < halt_poll_ns) grow_halt_poll_ns(vcpu); - } + } else + vcpu->halt_poll_ns = 0; trace_kvm_vcpu_wakeup(block_ns, waited); } -- cgit v0.10.2 From 43297dda0a51e4ffed0888ce727c218cfb7474b6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Sep 2015 16:06:03 +0100 Subject: KVM: arm64: add workaround for Cortex-A57 erratum #852523 When restoring the system register state for an AArch32 guest at EL2, writes to DACR32_EL2 may not be correctly synchronised by Cortex-A57, which can lead to the guest effectively running with junk in the DACR and running into unexpected domain faults. This patch works around the issue by re-ordering our restoration of the AArch32 register aliases so that they happen before the AArch64 system registers. Ensuring that the registers are restored in this order guarantees that they will be correctly synchronised by the core. Cc: Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8188f6a..39aa322 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -745,6 +745,9 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs skip_debug_state x3, 1f @@ -752,7 +755,6 @@ ENTRY(__kvm_vcpu_run) kern_hyp_va x3 bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. -- cgit v0.10.2 From 586b286b110e94eb31840ac5afc0c24e0881fe34 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 9 Sep 2015 21:34:51 -0400 Subject: dm crypt: constrain crypt device's max_segment_size to PAGE_SIZE Setting the dm-crypt device's max_segment_size to PAGE_SIZE is an unfortunate constraint that is required to avoid the potential for exceeding dm-crypt's underlying device's max_segments limits -- due to crypt_alloc_buffer() possibly allocating pages for the encryption bio that are not as physically contiguous as the original bio. It is interesting to note that this problem was already fixed back in 2007 via commit 91e106259 ("dm crypt: use bio_add_page"). But Linux 4.0 commit cf2f1abfb ("dm crypt: don't allocate pages for a partial request") regressed dm-crypt back to _not_ using bio_add_page(). But given dm-crypt's cpu parallelization changes all depend on commit cf2f1abfb's abandoning of the more complex io fragments processing that dm-crypt previously had we cannot easily go back to using bio_add_page(). So all said the cleanest way to resolve this issue is to fix dm-crypt to properly constrain the original bios entering dm-crypt so the encryption bios that dm-crypt generates from the original bios are always compatible with the underlying device's max_segments queue limits. It should be noted that technically Linux 4.3 does _not_ need this fix because of the block core's new late bio-splitting capability. But, it is reasoned, there is little to be gained by having the block core split the encrypted bio that is composed of PAGE_SIZE segments. That said, in the future we may revert this change. Fixes: cf2f1abfb ("dm crypt: don't allocate pages for a partial request") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=104421 Suggested-by: Jeff Moyer Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.0+ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d60c88d..4b3b6f8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -968,7 +968,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); /* * Generate a new unfragmented bio with the given size - * This should never violate the device limitations + * This should never violate the device limitations (but only because + * max_segment_size is being constrained to PAGE_SIZE). * * This function may be called concurrently. If we allocate from the mempool * concurrently, there is a possibility of deadlock. For example, if we have @@ -2045,9 +2046,20 @@ static int crypt_iterate_devices(struct dm_target *ti, return fn(ti, cc->dev, cc->start, ti->len, data); } +static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + /* + * Unfortunate constraint that is required to avoid the potential + * for exceeding underlying device's max_segments limits -- due to + * crypt_alloc_buffer() possibly allocating pages for the encryption + * bio that are not as physically contiguous as the original bio. + */ + limits->max_segment_size = PAGE_SIZE; +} + static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 0}, + .version = {1, 14, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -2058,6 +2070,7 @@ static struct target_type crypt_target = { .resume = crypt_resume, .message = crypt_message, .iterate_devices = crypt_iterate_devices, + .io_hints = crypt_io_hints, }; static int __init dm_crypt_init(void) -- cgit v0.10.2 From 63cdbc06b357dcb3a7104a421ee4a4550d7fadfd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Sep 2015 17:06:27 +0200 Subject: netfilter: bridge: fix routing of bridge frames with call-iptables=1 We can't re-use the physoutdev storage area. 1. When using NFQUEUE in PREROUTING, we attempt to bump a bogus refcnt since nf_bridge->physoutdev is garbage (ipv4/ipv6 address) 2. for same reason, we crash in physdev match in FORWARD or later if skb is routed instead of bridged. This increases nf_bridge_info to 40 bytes, but we have no other choice. Fixes: 72b1e5e4cac7 ("netfilter: bridge: reduce nf_bridge_info to 32 bytes again") Reported-by: Sander Eikelenboom Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2738d35..9987af0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -179,6 +179,9 @@ struct nf_bridge_info { u8 bridged_dnat:1; __u16 frag_max_size; struct net_device *physindev; + + /* always valid & non-NULL from FORWARD on, for physdev match */ + struct net_device *physoutdev; union { /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; @@ -189,9 +192,6 @@ struct nf_bridge_info { * skb is out in neigh layer. */ char neigh_header[8]; - - /* always valid & non-NULL from FORWARD on, for physdev match */ - struct net_device *physoutdev; }; }; #endif -- cgit v0.10.2 From ba378ca9c04a5fc1b2cf0f0274a9d02eb3d1bad9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Sep 2015 18:04:09 +0200 Subject: netfilter: nft_compat: skip family comparison in case of NFPROTO_UNSPEC Fix lookup of existing match/target structures in the corresponding list by skipping the family check if NFPROTO_UNSPEC is used. This is resulting in the allocation and insertion of one match/target structure for each use of them. So this not only bloats memory consumption but also severely affects the time to reload the ruleset from the iptables-compat utility. After this patch, iptables-compat-restore and iptables-compat take almost the same time to reload large rulesets. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 66def31..9c8fab0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -619,6 +619,13 @@ struct nft_xt { static struct nft_expr_type nft_match_type; +static bool nft_match_cmp(const struct xt_match *match, + const char *name, u32 rev, u32 family) +{ + return strcmp(match->name, name) == 0 && match->revision == rev && + (match->family == NFPROTO_UNSPEC || match->family == family); +} + static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -626,7 +633,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_match; struct xt_match *match; char *mt_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_MATCH_NAME] == NULL || tb[NFTA_MATCH_REV] == NULL || @@ -641,8 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) { + if (nft_match_cmp(match, mt_name, rev, family)) { if (!try_module_get(match->me)) return ERR_PTR(-ENOENT); @@ -693,6 +699,13 @@ static LIST_HEAD(nft_target_list); static struct nft_expr_type nft_target_type; +static bool nft_target_cmp(const struct xt_target *tg, + const char *name, u32 rev, u32 family) +{ + return strcmp(tg->name, name) == 0 && tg->revision == rev && + (tg->family == NFPROTO_UNSPEC || tg->family == family); +} + static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -700,7 +713,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_target; struct xt_target *target; char *tg_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_TARGET_NAME] == NULL || tb[NFTA_TARGET_REV] == NULL || @@ -715,8 +728,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) { + if (nft_target_cmp(target, tg_name, rev, family)) { if (!try_module_get(target->me)) return ERR_PTR(-ENOENT); -- cgit v0.10.2 From 4857c91f0d195f05908fff296ba1ec5fca87066c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 12:00:55 +0200 Subject: x86/ioapic: Force affinity setting in setup_ioapic_dest() The recent ioapic cleanups changed the affinity setting in setup_ioapic_dest() from a direct write to the hardware to the delayed affinity setup via irq_set_affinity(). That results in a warning from chained_irq_exit(): WARNING: CPU: 0 PID: 5 at kernel/irq/migration.c:32 irq_move_masked_irq [] irq_move_masked_irq+0xb8/0xc0 [] ioapic_ack_level+0x111/0x130 [] intel_gpio_irq_handler+0x148/0x1c0 The reason is that irq_set_affinity() does not write directly to the hardware. It marks the affinity setting as pending and executes it from the next interrupt. The chained handler infrastructure does not take the irq descriptor lock for performance reasons because such a chained interrupt is not visible to any interfaces. So the delayed affinity setting triggers the warning in irq_move_masked_irq(). Restore the old behaviour by calling the set_affinity function of the ioapic chip in setup_ioapic_dest(). This is safe as none of the interrupts can be on the fly at this point. Fixes: aa5cb97f14a2 'x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces' Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: jarkko.nikula@linux.intel.com diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f8..5c60bb1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; const struct cpumask *mask; struct irq_data *idata; + struct irq_chip *chip; if (skip_ioapic_setup == 1) return; @@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - irq_set_affinity(irq, mask); + chip = irq_data_get_irq_chip(idata); + chip->irq_set_affinity(idata, mask, false); } - } #endif -- cgit v0.10.2 From 723831927e8813b5b336d383174f686ad708bf10 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 14 Sep 2015 16:06:48 +0300 Subject: ASoC: davinci-mcasp: Revise the FIFO threshold calculation The FIFO threshold for McASP should be <=[tx/rx]numevt so the initial value for the refining should meet this requirement as well. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index add6bb9..1260b31 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -663,7 +663,7 @@ static int mcasp_common_hw_param(struct davinci_mcasp *mcasp, int stream, u8 rx_ser = 0; u8 slots = mcasp->tdm_slots; u8 max_active_serializers = (channels + slots - 1) / slots; - int active_serializers, numevt, n; + int active_serializers, numevt; u32 reg; /* Default configuration */ if (mcasp->version < MCASP_VERSION_3) @@ -745,9 +745,8 @@ static int mcasp_common_hw_param(struct davinci_mcasp *mcasp, int stream, * The number of words for numevt need to be in steps of active * serializers. */ - n = numevt % active_serializers; - if (n) - numevt += (active_serializers - n); + numevt = (numevt / active_serializers) * active_serializers; + while (period_words % numevt && numevt > 0) numevt -= active_serializers; if (numevt <= 0) -- cgit v0.10.2 From 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 30 Jul 2015 16:24:43 -0700 Subject: x86/apic: Serialize LVTT and TSC_DEADLINE writes The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not guaranteed that the write to LVTT has reached the APIC before the TSC_DEADLINE MSR is written. In such a case the write to the MSR is ignored and as a consequence the local timer interrupt never fires. The SDM decribes this issue for xAPIC and x2APIC modes. The serialization methods recommended by the SDM differ. xAPIC: "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." x2APIC: "To allow for efficient access to the APIC registers in x2APIC mode, the serializing semantics of WRMSR are relaxed when writing to the APIC registers. Thus, system software should not use 'WRMSR to APIC registers in x2APIC mode' as a serializing instruction. Read and write accesses to the APIC registers will occur in program order. A WRMSR to an APIC register may complete before all preceding stores are globally visible; software can prevent this by inserting a serializing instruction, an SFENCE, or an MFENCE before the WRMSR." The xAPIC method is to just wait for the memory mapped write to hit the LVTT by checking whether the MSR write has reached the hardware. There is no reason why a proper MFENCE after the memory mapped write would not do the same. Andi Kleen confirmed that MFENCE is sufficient for the xAPIC case as well. Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE support. [ tglx: Massaged the changelog ] Signed-off-by: Shaohua Li Reviewed-by: Ingo Molnar Cc: Cc: Cc: Cc: Andi Kleen Cc: H. Peter Anvin Cc: stable@vger.kernel.org #v3.7+ Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46..24e94ce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } -- cgit v0.10.2 From 81523aac01a7f68af2576800c8623edfb6323017 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 11 Sep 2015 14:12:53 +0800 Subject: KVM: make the declaration of functions within 80 characters After 'commit 0b8ba4a2b658 ("KVM: fix checkpatch.pl errors in kvm/coalesced_mmio.h")', the declaration of the two function will exceed 80 characters. This patch reduces the TAPs to make each line in 80 characters. Signed-off-by: Wei Yang Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5cbf190..6bca74c 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev { int kvm_coalesced_mmio_init(struct kvm *kvm); void kvm_coalesced_mmio_free(struct kvm *kvm); int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); #else -- cgit v0.10.2 From e4fba9b5be12d577d2e2c19fdca6b0744c3f271e Mon Sep 17 00:00:00 2001 From: Koro Chen Date: Mon, 14 Sep 2015 14:51:17 +0800 Subject: ASoC: mediatek: Increase periods_min in capture In capture, there is chance that hw_ptr reported at IRQ is a little smaller than period_size due to internal AFE buffer. In the case of ping-pong buffer: |xxxxxxxxxxxxxxxxxxxxxxxxxxxx--|-----------------------------| hw_ptr < period_size This available buffer will not be read since its size is smaller than avail_min (which is period_size by default), and read thread continues to sleep. If the next hw_ptr is just a little larger than buffer_size, overrun occurs. One more period can hold the possible unread buffer. Signed-off-by: Koro Chen Signed-off-by: Mark Brown diff --git a/sound/soc/mediatek/mtk-afe-pcm.c b/sound/soc/mediatek/mtk-afe-pcm.c index d190fe0..f5baf3c 100644 --- a/sound/soc/mediatek/mtk-afe-pcm.c +++ b/sound/soc/mediatek/mtk-afe-pcm.c @@ -549,6 +549,23 @@ static int mtk_afe_dais_startup(struct snd_pcm_substream *substream, memif->substream = substream; snd_soc_set_runtime_hwparams(substream, &mtk_afe_hardware); + + /* + * Capture cannot use ping-pong buffer since hw_ptr at IRQ may be + * smaller than period_size due to AFE's internal buffer. + * This easily leads to overrun when avail_min is period_size. + * One more period can hold the possible unread buffer. + */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { + ret = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIODS, + 3, + mtk_afe_hardware.periods_max); + if (ret < 0) { + dev_err(afe->dev, "hw_constraint_minmax failed\n"); + return ret; + } + } ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); if (ret < 0) -- cgit v0.10.2 From 42617869bf095c650e67aad4001cab4224e7fa98 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 1 Sep 2015 12:31:24 +0800 Subject: ASoC: SPEAr: Make SND_SPEAR_SOC select SND_SOC_GENERIC_DMAENGINE_PCM devm_snd_dmaengine_pcm_register() is guarded by CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM. Signed-off-by: Axel Lin Signed-off-by: Mark Brown diff --git a/sound/soc/spear/Kconfig b/sound/soc/spear/Kconfig index 0a53053..4fb9141 100644 --- a/sound/soc/spear/Kconfig +++ b/sound/soc/spear/Kconfig @@ -1,6 +1,6 @@ config SND_SPEAR_SOC tristate - select SND_DMAENGINE_PCM + select SND_SOC_GENERIC_DMAENGINE_PCM config SND_SPEAR_SPDIF_OUT tristate -- cgit v0.10.2 From 921e54680aefe52f28d9ce9485edb1bfef4b92a8 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 1 Sep 2015 14:50:15 +0800 Subject: ASoC: au1x: psc-i2s: Fix unused variable 'ret' warning Fix below build warning: sound/soc/au1x/psc-i2s.c: In function 'au1xpsc_i2s_drvprobe': sound/soc/au1x/psc-i2s.c:299:6: warning: unused variable 'ret' [-Wunused-variable] Reported-by: kbuild test robot Signed-off-by: Axel Lin Acked-by: Manuel Lauss Signed-off-by: Mark Brown diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index 38e853a..0bf9d62 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -296,7 +296,6 @@ static int au1xpsc_i2s_drvprobe(struct platform_device *pdev) { struct resource *iores, *dmares; unsigned long sel; - int ret; struct au1xpsc_audio_data *wd; wd = devm_kzalloc(&pdev->dev, sizeof(struct au1xpsc_audio_data), -- cgit v0.10.2 From bd315aab8a3ab1bc7074774b89a5d8ec7c1ff7ab Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 14 Sep 2015 10:23:55 +0000 Subject: perf top: Fix segfault pressing -> with no hist entries 'perf top' segfaults with following operation: # perf top -e page-faults -p 11400 # 11400 never generates page-fault Then on the resulting empty interface, press right key: # ./perf top -e page-faults -p 11400 perf: Segmentation fault -------- backtrace -------- ./perf[0x535428] /lib64/libc.so.6(+0x3545f)[0x7f0dd360745f] ./perf[0x531d46] ./perf(perf_evlist__tui_browse_hists+0x96)[0x5340d6] ./perf[0x44ba2f] /lib64/libpthread.so.0(+0x81d0)[0x7f0dd49dc1d0] /lib64/libc.so.6(clone+0x6c)[0x7f0dd36b90dc] The bug resides in perf_evsel__hists_browse() that, in the above circumstance browser->selection can be NULL, but code after skip_annotation doesn't consider it. This patch fix it by checking browser->selection before fetching browser->selection->map. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1442226235-117265-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cf86f2d..c04c60d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1968,7 +1968,8 @@ skip_annotation: &options[nr_options], dso); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection->map); + browser->selection ? + browser->selection->map : NULL); /* perf script support */ if (browser->he_selection) { @@ -1976,6 +1977,15 @@ skip_annotation: &actions[nr_options], &options[nr_options], thread, NULL); + /* + * Note that browser->selection != NULL + * when browser->he_selection is not NULL, + * so we don't need to check browser->selection + * before fetching browser->selection->sym like what + * we do before fetching browser->selection->map. + * + * See hist_browser__show_entry. + */ nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], -- cgit v0.10.2 From 295c3405a8bbd69ee8c8eb6580f30b0b8739b33a Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 9 Sep 2015 21:27:42 +0300 Subject: ASoC: davinci-mcasp: Set .symmetric_rates = 1 in snd_soc_dai_driver The TX and RX direction share the same bit clock and frame sync, so the samplerate must be the same to both directions. Signed-off-by: Jyri Sarha Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 1260b31..2c5a2c5 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1298,6 +1298,7 @@ static struct snd_soc_dai_driver davinci_mcasp_dai[] = { .ops = &davinci_mcasp_dai_ops, .symmetric_samplebits = 1, + .symmetric_rates = 1, }, { .name = "davinci-mcasp.1", -- cgit v0.10.2 From 380528f742796e1244e931c51e77c9ed664f566c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sun, 13 Sep 2015 13:20:36 +0200 Subject: clk: rockchip: add critical clock for rk3368 Again a result of the gpio-clock-liberation the rk3368 needs the pclk_pd_pmu marked as critical, to boot successfully. Reported-by: Mark Rutland Signed-off-by: Heiko Stuebner Tested-by: Mark Rutland Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index 9c5d61e..7e6b783 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -818,6 +818,10 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { GATE(0, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS), }; +static const char *const rk3368_critical_clocks[] __initconst = { + "pclk_pd_pmu", +}; + static void __init rk3368_clk_init(struct device_node *np) { void __iomem *reg_base; @@ -862,6 +866,8 @@ static void __init rk3368_clk_init(struct device_node *np) RK3368_GRF_SOC_STATUS0); rockchip_clk_register_branches(rk3368_clk_branches, ARRAY_SIZE(rk3368_clk_branches)); + rockchip_clk_protect_critical(rk3368_critical_clocks, + ARRAY_SIZE(rk3368_critical_clocks)); rockchip_clk_register_armclk(ARMCLKB, "armclkb", mux_armclkb_p, ARRAY_SIZE(mux_armclkb_p), -- cgit v0.10.2 From 7ef7cc9fdf9853b3027c55b9481991695ad5e5b2 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Mon, 14 Sep 2015 12:13:27 +0800 Subject: seltests/zram: fix syntax error Not all shells define a variable UID. This is a bash and zsh feature only. In other shells, the UID variable is not defined, so here test command expands to [ != 0 ] which is a syntax error. Without this patch: root@HGH1000007090:/opt/work/linux/tools/testing/selftests/zram# sh zram.sh zram.sh: 8: [: !=: unexpected operator zram.sh : No zram.ko module or /dev/zram0 device file not found zram.sh : CONFIG_ZRAM is not set With this patch: root@HGH1000007090:/opt/work/linux/tools/testing/selftests/zram# sh ./zram.sh zram.sh : No zram.ko module or /dev/zram0 device file not found zram.sh : CONFIG_ZRAM is not set Signed-off-by: Zhang Zhen Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 20de9a7..683a292 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -1,15 +1,7 @@ #!/bin/bash TCID="zram.sh" -check_prereqs() -{ - local msg="skip all tests:" - - if [ $UID != 0 ]; then - echo $msg must be run as root >&2 - exit 0 - fi -} +. ./zram_lib.sh run_zram () { echo "--------------------" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 424e68e..f6a9c73 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -23,8 +23,9 @@ trap INT check_prereqs() { local msg="skip all tests:" + local uid=$(id -u) - if [ $UID != 0 ]; then + if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 exit 0 fi -- cgit v0.10.2 From b623c4daadb5a4bfaef62783085b95bd9ba5a77c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 21 Aug 2015 11:22:35 -0700 Subject: selftests/seccomp: add support for s390 This adds support for s390 to the seccomp selftests. Some improvements were made to enhance the accuracy of failure reporting, and additional tests were added to validate assumptions about the currently traced syscall. Also adds early asserts for running on older kernels to avoid noise when the seccomp syscall is not implemented. Signed-off-by: Kees Cook Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index a004b4c..770f47a 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1210,6 +1210,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM gpr[0] # define SYSCALL_RET gpr[3] +#elif defined(__s390__) +# define ARCH_REGS s390_regs +# define SYSCALL_NUM gprs[2] +# define SYSCALL_RET gprs[2] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1243,7 +1247,8 @@ void change_syscall(struct __test_metadata *_metadata, ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); EXPECT_EQ(0, ret); -#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__powerpc__) +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(__powerpc__) || defined(__s390__) { regs.SYSCALL_NUM = syscall; } @@ -1281,17 +1286,21 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); EXPECT_EQ(0, ret); + /* Validate and take action on expected syscalls. */ switch (msg) { case 0x1002: /* change getpid to getppid. */ + EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); change_syscall(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid. */ + EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); change_syscall(_metadata, tracee, -1); break; case 0x1004: /* do nothing (allow getppid) */ + EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; default: EXPECT_EQ(0, msg) { @@ -1409,6 +1418,8 @@ TEST_F(TRACE_syscall, syscall_dropped) # define __NR_seccomp 277 # elif defined(__powerpc__) # define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff @@ -1453,6 +1464,9 @@ TEST(seccomp_syscall) /* Reject insane operation. */ ret = seccomp(-1, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } EXPECT_EQ(EINVAL, errno) { TH_LOG("Did not reject crazy op value!"); } @@ -1501,6 +1515,9 @@ TEST(seccomp_syscall_mode_lock) } ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } EXPECT_EQ(0, ret) { TH_LOG("Could not install filter!"); } @@ -1535,6 +1552,9 @@ TEST(TSYNC_first) ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } EXPECT_EQ(0, ret) { TH_LOG("Could not install initial filter with TSYNC!"); } @@ -1694,6 +1714,9 @@ TEST_F(TSYNC, siblings_fail_prctl) /* Check prctl failure detection by requesting sib 0 diverge. */ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } ASSERT_EQ(0, ret) { TH_LOG("setting filter failed"); } @@ -1731,6 +1754,9 @@ TEST_F(TSYNC, two_siblings_with_ancestor) } ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); } @@ -1805,6 +1831,9 @@ TEST_F(TSYNC, two_siblings_with_no_filter) ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, &self->apply_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } ASSERT_EQ(0, ret) { TH_LOG("Could install filter on all threads!"); } @@ -1833,6 +1862,9 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) } ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); } @@ -1890,6 +1922,9 @@ TEST_F(TSYNC, two_siblings_not_under_filter) } ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); } diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h index 977a6af..fb28416 100644 --- a/tools/testing/selftests/seccomp/test_harness.h +++ b/tools/testing/selftests/seccomp/test_harness.h @@ -370,11 +370,8 @@ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ - unsigned long long __exp_print = 0; \ - unsigned long long __seen_print = 0; \ - /* Avoid casting complaints the scariest way we can. */ \ - memcpy(&__exp_print, &__exp, sizeof(__exp)); \ - memcpy(&__seen_print, &__seen, sizeof(__seen)); \ + unsigned long long __exp_print = (unsigned long long)__exp; \ + unsigned long long __seen_print = (unsigned long long)__seen; \ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ #_expected, __exp_print, #_t, \ #_seen, __seen_print); \ -- cgit v0.10.2 From 1087d019176df7e406387de3fe76129c74d24081 Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Wed, 9 Sep 2015 21:06:25 +0800 Subject: selftests: rename jump label to static_keys Commit 2bf9e0ab08c6 ("locking/static_keys: Provide a selftest") renamed jump_label directory to static_keys and failed to update the Makefile, causing the selftests build to fail. This commit fixes it by updating the Makefile with the new name and also moves the entry into the correct position to keep the list alphabetically sorted. Fixes: 2bf9e0ab08c6 ("locking/static_keys: Provide a selftest") Signed-off-by: Bamvor Jian Zhang Acked-by: Shuah Khan Acked-by: Michael Ellerman Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 89b05e2..cfe1213 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -16,12 +16,12 @@ TARGETS += powerpc TARGETS += ptrace TARGETS += seccomp TARGETS += size +TARGETS += static_keys TARGETS += sysctl ifneq (1, $(quicktest)) TARGETS += timers endif TARGETS += user -TARGETS += jumplabel TARGETS += vm TARGETS += x86 TARGETS += zram -- cgit v0.10.2 From cc19ada7340b5ed87a94ea381ff7ade6a053b2f3 Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Wed, 9 Sep 2015 21:06:26 +0800 Subject: selftests: mqueue: allow extra cflags Change from = to += in order to allows the user to pass whatever CFLAGS they wish(E.g. pass the proper headers and librareis (popt.h and libpopt.so) in cross-compiling) Suggested-by: Michael Ellerman Signed-off-by: Bamvor Jian Zhang Acked-by: Michael Ellerman Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 0e3b41e..ca8327f 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,4 +1,4 @@ -CFLAGS = -O2 +CFLAGS += -O2 all: $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt -- cgit v0.10.2 From b11054b959cf921a646b64983bc35193c1597739 Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Wed, 9 Sep 2015 21:06:27 +0800 Subject: selftests: mqueue: simplify the Makefile Use make's implict rule for building simple C programs. Suggested-by: Michael Ellerman Signed-off-by: Bamvor Jian Zhang Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index ca8327f..eebac29 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,8 +1,8 @@ CFLAGS += -O2 +LDLIBS = -lrt -lpthread -lpopt +TEST_PROGS := mq_open_tests mq_perf_tests -all: - $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt - $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt +all: $(TEST_PROGS) include ../lib.mk @@ -11,8 +11,6 @@ override define RUN_TESTS @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef -TEST_PROGS := mq_open_tests mq_perf_tests - override define EMIT_TESTS echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" -- cgit v0.10.2 From 900d65ee11aae3a23cb963f484b65eb3e269dd9c Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Wed, 9 Sep 2015 21:06:28 +0800 Subject: selftests: change install command to rsync The command of install could not handle the special files in exec testcases, change the default rule to rsync to fix this. The installation is unchanged after this commit. Suggested-by: Michael Ellerman Signed-off-by: Bamvor Jian Zhang Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 0acbeca..4e6ed13 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,7 +1,7 @@ all: TEST_PROGS := ftracetest -TEST_DIRS := test.d/ +TEST_DIRS := test.d include ../lib.mk diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 97f1c67..50a93f5 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -12,13 +12,10 @@ run_tests: all $(RUN_TESTS) define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p $(INSTALL_PATH); \ - for TEST_DIR in $(TEST_DIRS); do \ - cp -r $$TEST_DIR $(INSTALL_PATH); \ - done; \ - echo "install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)"; \ - install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES); \ + @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ + mkdir -p ${INSTALL_PATH}; \ + echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ + rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ fi endef -- cgit v0.10.2 From ae7858180510d6fd0dfaf66d209bc6103db1678f Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Wed, 9 Sep 2015 21:06:29 +0800 Subject: selftests: exec: revert to default emit rule With the previous patch, the installation method change from install to rsync. There is no need to create subdir during test, the default EMIT_TESTS is enough. This patch essentially revert commit 84cbd9e4 ("selftests/exec: do not install subdir as it is already created"). Suggested-by: Michael Ellerman Signed-off-by: Bamvor Jian Zhang Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 6b76bfd..4e400eb 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,6 +1,6 @@ CFLAGS = -Wall BINARIES = execveat -DEPS = execveat.symlink execveat.denatured script +DEPS = execveat.symlink execveat.denatured script subdir all: $(BINARIES) $(DEPS) subdir: @@ -22,7 +22,5 @@ TEST_FILES := $(DEPS) include ../lib.mk -override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\"" - clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* -- cgit v0.10.2 From 005efedf2c7d0a270ffbe28d8997b03844f3e3e7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Sep 2015 09:09:31 +0100 Subject: Btrfs: fix read corruption of compressed and shared extents If a file has a range pointing to a compressed extent, followed by another range that points to the same compressed extent and a read operation attempts to read both ranges (either completely or part of them), the pages that correspond to the second range are incorrectly filled with zeroes. Consider the following example: File layout [0 - 8K] [8K - 24K] | | | | points to extent X, points to extent X, offset 4K, length of 8K offset 0, length 16K [extent X, compressed length = 4K uncompressed length = 16K] If a readpages() call spans the 2 ranges, a single bio to read the extent is submitted - extent_io.c:submit_extent_page() would only create a new bio to cover the second range pointing to the extent if the extent it points to had a different logical address than the extent associated with the first range. This has a consequence of the compressed read end io handler (compression.c:end_compressed_bio_read()) finish once the extent is decompressed into the pages covering the first range, leaving the remaining pages (belonging to the second range) filled with zeroes (done by compression.c:btrfs_clear_biovec_end()). So fix this by submitting the current bio whenever we find a range pointing to a compressed extent that was preceded by a range with a different extent map. This is the simplest solution for this corner case. Making the end io callback populate both ranges (or more, if we have multiple pointing to the same extent) is a much more complex solution since each bio is tightly coupled with a single extent map and the extent maps associated to the ranges pointing to the shared extent can have different offsets and lengths. The following test case for fstests triggers the issue: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner rm -f $seqres.full test_clone_and_read_compressed_extent() { local mount_opts=$1 _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount $mount_opts # Create a test file with a single extent that is compressed (the # data we write into it is highly compressible no matter which # compression algorithm is used, zlib or lzo). $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 4K" \ -c "pwrite -S 0xbb 4K 8K" \ -c "pwrite -S 0xcc 12K 4K" \ $SCRATCH_MNT/foo | _filter_xfs_io # Now clone our extent into an adjacent offset. $CLONER_PROG -s $((4 * 1024)) -d $((16 * 1024)) -l $((8 * 1024)) \ $SCRATCH_MNT/foo $SCRATCH_MNT/foo # Same as before but for this file we clone the extent into a lower # file offset. $XFS_IO_PROG -f -c "pwrite -S 0xaa 8K 4K" \ -c "pwrite -S 0xbb 12K 8K" \ -c "pwrite -S 0xcc 20K 4K" \ $SCRATCH_MNT/bar | _filter_xfs_io $CLONER_PROG -s $((12 * 1024)) -d 0 -l $((8 * 1024)) \ $SCRATCH_MNT/bar $SCRATCH_MNT/bar echo "File digests before unmounting filesystem:" md5sum $SCRATCH_MNT/foo | _filter_scratch md5sum $SCRATCH_MNT/bar | _filter_scratch # Evicting the inode or clearing the page cache before reading # again the file would also trigger the bug - reads were returning # all bytes in the range corresponding to the second reference to # the extent with a value of 0, but the correct data was persisted # (it was a bug exclusively in the read path). The issue happened # only if the same readpages() call targeted pages belonging to the # first and second ranges that point to the same compressed extent. _scratch_remount echo "File digests after mounting filesystem again:" # Must match the same digests we got before. md5sum $SCRATCH_MNT/foo | _filter_scratch md5sum $SCRATCH_MNT/bar | _filter_scratch } echo -e "\nTesting with zlib compression..." test_clone_and_read_compressed_extent "-o compress=zlib" _scratch_unmount echo -e "\nTesting with lzo compression..." test_clone_and_read_compressed_extent "-o compress=lzo" status=0 exit Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: Qu Wenruo Reviewed-by: Liu Bo diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fa19f2f..11aa8f7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2805,7 +2805,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -2823,6 +2824,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, @@ -2922,7 +2924,8 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2970,6 +2973,7 @@ static int __do_readpage(struct extent_io_tree *tree, } while (cur <= end) { unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; @@ -3020,6 +3024,49 @@ static int __do_readpage(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -3069,7 +3116,8 @@ static int __do_readpage(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); if (!ret) { nr++; *bio_flags = this_bio_flag; @@ -3101,6 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, struct inode *inode; struct btrfs_ordered_extent *ordered; int index; + u64 prev_em_start = (u64)-1; inode = pages[0]->mapping->host; while (1) { @@ -3116,7 +3165,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, &prev_em_start); page_cache_release(pages[index]); } } @@ -3184,7 +3233,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw); + bio_flags, rw, NULL); return ret; } @@ -3210,7 +3259,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, int ret; ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, READ, NULL); if (bio) ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; @@ -3463,7 +3512,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -3765,7 +3814,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, epd->bio_flags, bio_flags); + 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); -- cgit v0.10.2 From cf3a51059efd653aa38667db799e85d77231af9d Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 14 Sep 2015 16:01:54 +0100 Subject: ACPI: int340x_thermal: add missing CONFIG_ prefix This patch adds the missing CONFIG_ prefix to INTEL_SOC_DTS_THERMAL macros. Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c index 9dcf836..6c7c797 100644 --- a/drivers/acpi/int340x_thermal.c +++ b/drivers/acpi/int340x_thermal.c @@ -35,7 +35,7 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev, { #if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE) acpi_create_platform_device(adev); -#elif defined(INTEL_SOC_DTS_THERMAL) || defined(INTEL_SOC_DTS_THERMAL_MODULE) +#elif defined(CONFIG_INTEL_SOC_DTS_THERMAL) || defined(CONFIG_INTEL_SOC_DTS_THERMAL_MODULE) /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */ if (id->driver_data == INT3401_DEVICE) acpi_create_platform_device(adev); -- cgit v0.10.2 From bcb2b0b2bae2de744223c68521cd51c57feb486c Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 14 Sep 2015 16:01:55 +0100 Subject: ACPI: Eliminate CONFIG_.*{, _MODULE} #ifdef in favor of IS_ENABLED() This commit removes all CONFIG_.*{,_MODULE} in ACPI code, replacing it with IS_ENABLED(). Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 46506e7..a212cef 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -315,14 +315,10 @@ static void acpi_bus_osc_support(void) capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE; capbuf[OSC_SUPPORT_DWORD] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */ -#if defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) ||\ - defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PAD_SUPPORT; -#endif - -#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT; -#endif + if (IS_ENABLED(CONFIG_ACPI_PROCESSOR_AGGREGATOR)) + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PAD_SUPPORT; + if (IS_ENABLED(CONFIG_ACPI_PROCESSOR)) + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT; diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c index 6c7c797..33505c6 100644 --- a/drivers/acpi/int340x_thermal.c +++ b/drivers/acpi/int340x_thermal.c @@ -33,13 +33,12 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { static int int340x_thermal_handler_attach(struct acpi_device *adev, const struct acpi_device_id *id) { -#if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE) - acpi_create_platform_device(adev); -#elif defined(CONFIG_INTEL_SOC_DTS_THERMAL) || defined(CONFIG_INTEL_SOC_DTS_THERMAL_MODULE) + if (IS_ENABLED(CONFIG_INT340X_THERMAL)) + acpi_create_platform_device(adev); /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */ - if (id->driver_data == INT3401_DEVICE) + else if (IS_ENABLED(CONFIG_INTEL_SOC_DTS_THERMAL) && + id->driver_data == INT3401_DEVICE) acpi_create_platform_device(adev); -#endif return 1; } diff --git a/include/acpi/button.h b/include/acpi/button.h index 97eea0e..1cad8b2 100644 --- a/include/acpi/button.h +++ b/include/acpi/button.h @@ -3,7 +3,7 @@ #include -#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) +#if IS_ENABLED(CONFIG_ACPI_BUTTON) extern int acpi_lid_notifier_register(struct notifier_block *nb); extern int acpi_lid_notifier_unregister(struct notifier_block *nb); extern int acpi_lid_open(void); @@ -20,6 +20,6 @@ static inline int acpi_lid_open(void) { return 1; } -#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */ +#endif /* IS_ENABLED(CONFIG_ACPI_BUTTON) */ #endif /* ACPI_BUTTON_H */ diff --git a/include/acpi/video.h b/include/acpi/video.h index e840b29..c62392d 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -24,7 +24,7 @@ enum acpi_backlight_type { acpi_backlight_native, }; -#if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) +#if IS_ENABLED(CONFIG_ACPI_VIDEO) extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, -- cgit v0.10.2 From a30e577c96f59b1e1678ea5462432b09bf7d5cbc Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 11 Sep 2015 21:44:17 -0400 Subject: btrfs: skip waiting on ordered range for special files In btrfs_evict_inode, we properly truncate the page cache for evicted inodes but then we call btrfs_wait_ordered_range for every inode as well. It's the right thing to do for regular files but results in incorrect behavior for device inodes for block devices. filemap_fdatawrite_range gets called with inode->i_mapping which gets resolved to the block device inode before getting passed to wbc_attach_fdatawrite_inode and ultimately to inode_to_bdi. What happens next depends on whether there's an open file handle associated with the inode. If there is, we write to the block device, which is unexpected behavior. If there isn't, we through normally and inode->i_data is used. We can also end up racing against open/close which can result in crashes when i_mapping points to a block device inode that has been closed. Since there can't be any page cache associated with special file inodes, it's safe to skip the btrfs_wait_ordered_range call entirely and avoid the problem. Cc: Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100911 Tested-by: Christoph Biedl Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 37dd8d0..3d983de 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5080,7 +5080,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_free_io_failure_record(inode, 0, (u64)-1); -- cgit v0.10.2 From 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 14 Sep 2015 17:11:05 -0600 Subject: locking/static_keys: Fix up the static keys documentation Fix a few small mistakes in the static key documentation and delete an unneeded sentence. Suggested-by: Jason Baron Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150914171105.511e1e21@lwn.net Signed-off-by: Ingo Molnar diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index ec91158..477927b 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,8 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); -static_key_likely() -static_key_unlikely() +static_branch_likely() +static_branch_unlikely() 0) Abstract diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0684bd3..f109423 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * static_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). -- cgit v0.10.2 From e30f3963f2c3c03119d71f1804bdb94ccd33580b Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 14 Sep 2015 01:24:41 -0700 Subject: drm/ttm: Fix memory space allocation v2 In the event that TTM doesn't find a compatible memory type for the driver's first placement choice (placement without eviction), TTM returns -EINVAL without trying the driver's second choice. This causes problems on vmwgfx when VRAM is disabled before first modeset and during VT switches when fbdev is not enabled. Fix this by also trying the driver's second choice before returning -EINVAL. v2: Also check that man->use_type is true for the driver's second choice. Fixes a bug where disallowed memory types could be used. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8d9b7de..745e996 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -882,6 +882,8 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (ret) return ret; man = &bdev->man[mem_type]; + if (!man->has_type || !man->use_type) + continue; type_ok = ttm_bo_mt_compatible(man, mem_type, place, &cur_flags); @@ -889,6 +891,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (!type_ok) continue; + type_found = true; cur_flags = ttm_bo_select_caching(man, bo->mem.placement, cur_flags); /* @@ -901,12 +904,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (mem_type == TTM_PL_SYSTEM) break; - if (man->has_type && man->use_type) { - type_found = true; - ret = (*man->func->get_node)(man, bo, place, mem); - if (unlikely(ret)) - return ret; - } + ret = (*man->func->get_node)(man, bo, place, mem); + if (unlikely(ret)) + return ret; + if (mem->mm_node) break; } @@ -917,9 +918,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, return 0; } - if (!type_found) - return -EINVAL; - for (i = 0; i < placement->num_busy_placement; ++i) { const struct ttm_place *place = &placement->busy_placement[i]; @@ -927,11 +925,12 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (ret) return ret; man = &bdev->man[mem_type]; - if (!man->has_type) + if (!man->has_type || !man->use_type) continue; if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags)) continue; + type_found = true; cur_flags = ttm_bo_select_caching(man, bo->mem.placement, cur_flags); /* @@ -957,8 +956,13 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (ret == -ERESTARTSYS) has_erestartsys = true; } - ret = (has_erestartsys) ? -ERESTARTSYS : -ENOMEM; - return ret; + + if (!type_found) { + printk(KERN_ERR TTM_PFX "No compatible memory type found.\n"); + return -EINVAL; + } + + return (has_erestartsys) ? -ERESTARTSYS : -ENOMEM; } EXPORT_SYMBOL(ttm_bo_mem_space); -- cgit v0.10.2 From 2925c2fdf1e0eb642482f5b30577e9435aaa8edb Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Sep 2015 15:04:07 +1000 Subject: cxl: Fix unbalanced pci_dev_get in cxl_probe Currently the first thing we do in cxl_probe is to grab a reference on the pci device. Later on, we call device_register on our adapter. In our remove path, we call device_unregister, but we never call pci_dev_put. We therefore leak the device every time we do a reflash. device_register/unregister is sufficient to hold the reference. Therefore, drop the call to pci_dev_get. Here's why this is safe. The proposed cxl_probe(pdev) calls cxl_adapter_init: a) init calls cxl_adapter_alloc, which creates a struct cxl, conventionally called adapter. This struct contains a device entry, adapter->dev. b) init calls cxl_configure_adapter, where we set adapter->dev.parent = &dev->dev (here dev is the pci dev) So at this point, the cxl adapter's device's parent is the PCI device that I want to be refcounted properly. c) init calls cxl_register_adapter *) cxl_register_adapter calls device_register(&adapter->dev) So now we're in device_register, where dev is the adapter device, and we want to know if the PCI device is safe after we return. device_register(&adapter->dev) calls device_initialize() and then device_add(). device_add() does a get_device(). device_add() also explicitly grabs the device's parent, and calls get_device() on it: parent = get_device(dev->parent); So therefore, device_register() takes a lock on the parent PCI dev, which is what pci_dev_get() was guarding. pci_dev_get() can therefore be safely removed. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 02c8516..a5e9771 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1249,8 +1249,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; - pci_dev_get(dev); - if (cxl_verbose) dump_cxl_config_space(dev); -- cgit v0.10.2 From 2cd55c68c0a49a75433b15c7dbd1991fef81e662 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 15 Sep 2015 15:48:34 +1000 Subject: cxl: Fix build failure due to -Wunused-variable behaviour change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent change in gcc caused this build failure: /var/lib/jenkins/workspace/gcc_kernel_build/linux/drivers/misc/cxl/cxl.h:72:27: error: ‘CXL_PSL_DLCNTL’ defined but not used [-Werror=unused-const-variable] static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; Because of this gcc commit: Commit 1bca8cbd0c68366f07277f98ce6963e10c2aa617 by mark PR28901 -Wunused-variable ignores unused const initialised variables in C 12 years ago it was decided that -Wunused-variable shouldn't warn about static const variables because some code used const static char rcsid[] strings which were never used but wanted in the code anyway. But as the bug points out this hides some real bugs. These days the usage of rcsids is not very popular anymore. So this patch changes the default to warn about unused static const variables in C with -Wunused-variable. And it adds a new option -Wno-unused-const-variable to turn this warning off. For C++ this new warning is off by default, since const variables can be used as #defines in C++. New testcases for the new defaults in C and C++ are included testing the new warning and suppressing it with an unused attribute or using -Wno-unused-const-variable. gcc/ChangeLog The cxl driver uses static consts in place of #defines in some cases for type safety, so this change causes the driver to fail to build on new copilers as these constants are not all used in every file that imports the header. Suppress the warning for this driver to return to the old behaviour of -Wunused-variable. Reported-by: Anton Blanchard Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 6f484df..6982f60 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Werror +ccflags-y := -Werror -Wno-unused-const-variable cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o -- cgit v0.10.2 From a69b09e2342a9c144b0291b9aeb849ab7d5843bf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:49 +0300 Subject: perf evlist: Simplify propagate_maps() logic If evsel->cpus is to be reassigned then the current value must be "put", which works even if it is NULL. Simplify the current logic by moving the "put" next to the assignment. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d51a520..95e07ea 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1113,11 +1113,10 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (evsel->cpus && has_user_cpus) + if (!evsel->cpus || has_user_cpus) { cpu_map__put(evsel->cpus); - - if (!evsel->cpus || has_user_cpus) evsel->cpus = cpu_map__get(evlist->cpus); + } evsel->threads = thread_map__get(evlist->threads); -- cgit v0.10.2 From 725e06b2e2754fbff61521fa76fee51cee5bcb5f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:50 +0300 Subject: perf evlist: Simplify set_maps() logic Don't need to check for NULL when "putting" evlist->maps and evlist->threads because the "put" functions already do that. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 95e07ea..9cb9296 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1156,14 +1156,10 @@ int perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { - if (evlist->cpus) - cpu_map__put(evlist->cpus); - + cpu_map__put(evlist->cpus); evlist->cpus = cpus; - if (evlist->threads) - thread_map__put(evlist->threads); - + thread_map__put(evlist->threads); evlist->threads = threads; return perf_evlist__propagate_maps(evlist, false); -- cgit v0.10.2 From d5bc056e73841d4bc941474a342ef9b6a207ac84 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:51 +0300 Subject: perf evlist: Remove redundant validation from propagate_maps() The validation checks that the values that were just assigned, got assigned i.e. the error can't ever happen. Subsequent patches will call this code in places where errors are not being returned. Changing those code paths to return this non-existent error is counter-productive, so just remove it. That in turn results in perf_evlist__set_maps not needing to return an error, but callers aren't checking it either, so remove that too. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9cb9296..785bfd3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1103,8 +1103,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) +static void perf_evlist__propagate_maps(struct perf_evlist *evlist, + bool has_user_cpus) { struct perf_evsel *evsel; @@ -1119,13 +1119,7 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, } evsel->threads = thread_map__get(evlist->threads); - - if ((evlist->cpus && !evsel->cpus) || - (evlist->threads && !evsel->threads)) - return -ENOMEM; } - - return 0; } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1144,7 +1138,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, !!target->cpu_list); + perf_evlist__propagate_maps(evlist, !!target->cpu_list); + + return 0; out_delete_threads: thread_map__put(evlist->threads); @@ -1152,9 +1148,8 @@ out_delete_threads: return -1; } -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) { cpu_map__put(evlist->cpus); evlist->cpus = cpus; @@ -1162,7 +1157,7 @@ int perf_evlist__set_maps(struct perf_evlist *evlist, thread_map__put(evlist->threads); evlist->threads = threads; - return perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist, false); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b39a619..da2fa91 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -155,9 +155,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads); +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); -- cgit v0.10.2 From ec9a77a7e3346a05b1287597982d0dd09dd1c3bd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:52 +0300 Subject: perf evlist: Add has_user_cpus member Subsequent patches will need to call perf_evlist__propagate_maps without reference to a "target". Add evlist->has_user_cpus to record whether the user has specified which cpus to target (and therefore whether that list of cpus should override the default settings for a selected event i.e. the cpu maps should be propagated) Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 785bfd3..3a4445f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1103,8 +1103,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static void perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -1113,7 +1112,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->cpus || has_user_cpus) { + if (!evsel->cpus || evlist->has_user_cpus) { cpu_map__put(evsel->cpus); evsel->cpus = cpu_map__get(evlist->cpus); } @@ -1138,7 +1137,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - perf_evlist__propagate_maps(evlist, !!target->cpu_list); + evlist->has_user_cpus = !!target->cpu_list; + + perf_evlist__propagate_maps(evlist); return 0; @@ -1157,7 +1158,7 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, thread_map__put(evlist->threads); evlist->threads = threads; - perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index da2fa91..cfc4df6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -42,6 +42,7 @@ struct perf_evlist { int nr_mmaps; bool overwrite; bool enabled; + bool has_user_cpus; size_t mmap_len; int id_pos; int is_pos; -- cgit v0.10.2 From f114d6eff76d20b521d8716e969e71b1f56f82b5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:53 +0300 Subject: perf evlist: Fix splice_list_tail() not setting evlist Commit d49e46950772 ("perf evsel: Add a backpointer to the evlist a evsel is in") updated perf_evlist__add() but not perf_evlist__splice_list_tail(). This illustrates that it is better if perf_evlist__splice_list_tail() calls perf_evlist__add() instead of duplicating the logic, so do that. This will also simplify a subsequent fix for propagating maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3a4445f..961560b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -136,15 +136,14 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries) + struct list_head *list) { - bool set_id_pos = !evlist->nr_entries; + struct perf_evsel *evsel, *temp; - list_splice_tail(list, &evlist->entries); - evlist->nr_entries += nr_entries; - if (set_id_pos) - perf_evlist__set_id_pos(evlist); + __evlist__for_each_safe(list, temp, evsel) { + list_del_init(&evsel->node); + perf_evlist__add(evlist, evsel); + } } void __perf_evlist__set_leader(struct list_head *list) @@ -210,7 +209,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, list_add_tail(&evsel->node, &head); } - perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + perf_evlist__splice_list_tail(evlist, &head); return 0; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index cfc4df6..115d8b5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -179,8 +179,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries); + struct list_head *list); static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d826e6f..7e8ae21 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1140,10 +1140,9 @@ int parse_events(struct perf_evlist *evlist, const char *str, ret = parse_events__scanner(str, &data, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { - int entries = data.idx - evlist->nr_entries; struct perf_evsel *last; - perf_evlist__splice_list_tail(evlist, &data.list, entries); + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; last = perf_evlist__last(evlist); last->cmdline_group_boundary = true; -- cgit v0.10.2 From b278c364b35ae940b05f6a9edf8061fc886cd09e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:54 +0300 Subject: perf evlist: Fix missing thread_map__put in propagate_maps() perf_evlist__propagate_maps() incorrectly assumes evsel->threads is NULL before reassigning it, but it won't be NULL when perf_evlist__set_maps() is used to set different (or NULL) maps. Thus thread_map__put must be used, which works even if evsel->threads is NULL. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 961560b..79056c6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1116,6 +1116,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) evsel->cpus = cpu_map__get(evlist->cpus); } + thread_map__put(evsel->threads); evsel->threads = thread_map__get(evlist->threads); } } -- cgit v0.10.2 From fce4d296b405b03fba033a55017348bf55b10db6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:55 +0300 Subject: perf evsel: Add own_cpus member perf_evlist__propagate_maps() cannot easily tell if an evsel has its own cpu map. To make that simpler, keep a copy of the PMU cpu map and adjust the propagation logic accordingly. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 79056c6..5bd3b49 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1111,9 +1111,12 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->cpus || evlist->has_user_cpus) { + if (!evsel->own_cpus || evlist->has_user_cpus) { cpu_map__put(evsel->cpus); evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); } thread_map__put(evsel->threads); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c53f791..5410483 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1033,6 +1033,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 298e6bb..ef8925f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -98,6 +98,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct cpu_map *own_cpus; struct thread_map *threads; unsigned int sample_size; int id_pos; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7e8ae21..21ed6ee 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -287,8 +287,8 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - if (cpus) - evsel->cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); if (name) evsel->name = strdup(name); -- cgit v0.10.2 From 934e0f2053ce299893ca48a411bf7fdc8ac6254f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:56 +0300 Subject: perf evlist: Make set_maps() more resilient Make perf_evlist__set_maps() more resilient by allowing for the possibility that one or another of the maps isn't being changed and therefore should not be "put". Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5bd3b49..78ff52e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1155,11 +1155,22 @@ out_delete_threads: void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { - cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + /* + * Allow for the possibility that one or another of the maps isn't being + * changed i.e. don't put it. Note we are assuming the maps that are + * being applied are brand new and evlist is taking ownership of the + * original reference count of 1. If that is not the case it is up to + * the caller to increase the reference count. + */ + if (cpus != evlist->cpus) { + cpu_map__put(evlist->cpus); + evlist->cpus = cpus; + } - thread_map__put(evlist->threads); - evlist->threads = threads; + if (threads != evlist->threads) { + thread_map__put(evlist->threads); + evlist->threads = threads; + } perf_evlist__propagate_maps(evlist); } -- cgit v0.10.2 From 74bfd2b25de354feb4484c553dce4fe8d9c3b60b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:57 +0300 Subject: perf evlist: Make create_maps() use set_maps() Since there is a function to set maps, perf_evlist__create_maps() should use it. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 78ff52e..c17f355 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1126,29 +1126,30 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { - evlist->threads = thread_map__new_str(target->pid, target->tid, - target->uid); + struct cpu_map *cpus; + struct thread_map *threads; - if (evlist->threads == NULL) + threads = thread_map__new_str(target->pid, target->tid, target->uid); + + if (!threads) return -1; if (target__uses_dummy_map(target)) - evlist->cpus = cpu_map__dummy_new(); + cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(target->cpu_list); + cpus = cpu_map__new(target->cpu_list); - if (evlist->cpus == NULL) + if (!cpus) goto out_delete_threads; evlist->has_user_cpus = !!target->cpu_list; - perf_evlist__propagate_maps(evlist); + perf_evlist__set_maps(evlist, cpus, threads); return 0; out_delete_threads: - thread_map__put(evlist->threads); - evlist->threads = NULL; + thread_map__put(threads); return -1; } -- cgit v0.10.2 From adc0c3e87b0e1baeccabe09b6dba738f17d0e91d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:58 +0300 Subject: perf evlist: Factor out a function to propagate maps for a single evsel Subsequent fixes will need a function that just propagates maps for a single evsel so factor it out. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-11-git-send-email-adrian.hunter@intel.com [ Moved them to before perf_evlist__add() to avoid having to move it in the next patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c17f355..4e840bf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -124,6 +124,33 @@ void perf_evlist__delete(struct perf_evlist *evlist) free(evlist); } +static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (!evsel->own_cpus || evlist->has_user_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); + } + + thread_map__put(evsel->threads); + evsel->threads = thread_map__get(evlist->threads); +} + +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evlist__propagate_maps(evlist, evsel); +} + void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { entry->evlist = evlist; @@ -1102,28 +1129,6 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static void perf_evlist__propagate_maps(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - cpu_map__put(evsel->cpus); - evsel->cpus = cpu_map__get(evlist->cpus); - } else if (evsel->cpus != evsel->own_cpus) { - cpu_map__put(evsel->cpus); - evsel->cpus = cpu_map__get(evsel->own_cpus); - } - - thread_map__put(evsel->threads); - evsel->threads = thread_map__get(evlist->threads); - } -} - int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { struct cpu_map *cpus; -- cgit v0.10.2 From 44c42d71c659527c81bf169808959c9339116d85 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:59 +0300 Subject: perf evlist: Fix add() not propagating maps If evsels are added after maps are created, then they won't have any maps propagated to them. Fix that. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-12-git-send-email-adrian.hunter@intel.com [ Moved the moving of propagate_maps() to the patch before, so that this one does _just_ the one lile fix calling in add()] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4e840bf..99267ab 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -160,6 +160,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) if (!evlist->nr_entries++) perf_evlist__set_id_pos(evlist); + + __perf_evlist__propagate_maps(evlist, entry); } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, -- cgit v0.10.2 From 8c0498b6891d7ca5c379c6283de7fc7fe8eebe5c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:00 +0300 Subject: perf evlist: Fix create_syswide_maps() not propagating maps Fix it by making it call perf_evlist__set_maps() instead of setting the maps itself. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 99267ab..c8fc8a2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1400,6 +1400,8 @@ void perf_evlist__close(struct perf_evlist *evlist) static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) { + struct cpu_map *cpus; + struct thread_map *threads; int err = -ENOMEM; /* @@ -1411,20 +1413,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - evlist->cpus = cpu_map__new(NULL); - if (evlist->cpus == NULL) + cpus = cpu_map__new(NULL); + if (!cpus) goto out; - evlist->threads = thread_map__new_dummy(); - if (evlist->threads == NULL) - goto out_free_cpus; + threads = thread_map__new_dummy(); + if (!threads) + goto out_put; - err = 0; + perf_evlist__set_maps(evlist, cpus, threads); out: return err; -out_free_cpus: - cpu_map__put(evlist->cpus); - evlist->cpus = NULL; +out_put: + cpu_map__put(cpus); goto out; } -- cgit v0.10.2 From 2998272275fc31fc3d478ef9c95e7eaef67dafa3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:01 +0300 Subject: perf tests: Fix task exit test setting maps The test titled "Test number of exit event of a simple workload" was setting cpu/thread maps directly. Make it use the proper function perf_evlist__set_maps() especially now that it also propagates the maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-14-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef..add1638 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -43,6 +43,8 @@ int test__task_exit(void) }; const char *argv[] = { "true", NULL }; char sbuf[STRERR_BUFSIZE]; + struct cpu_map *cpus; + struct thread_map *threads; signal(SIGCHLD, sig_handler); @@ -58,14 +60,19 @@ int test__task_exit(void) * perf_evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(-1); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(-1); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + err = perf_evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { @@ -114,6 +121,9 @@ retry: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; -- cgit v0.10.2 From c5e6bd2ed3e81df443e4ae11e95ed71ff77bf9e5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:02 +0300 Subject: perf tests: Fix software clock events test setting maps The test titled "Test software clock events have valid period values" was setting cpu/thread maps directly. Make it use the proper function perf_evlist__set_maps() especially now that it also propagates the maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c9..5b83f56 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; + struct cpu_map *cpus; + struct thread_map *threads; attr.sample_freq = 500; @@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } perf_evlist__add(evlist, evsel); - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(getpid()); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(getpid()); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + if (perf_evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -107,6 +114,9 @@ next_event: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; -- cgit v0.10.2 From 2314ee4d444e28d4670ff80c84df68c25887decb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 21 Aug 2015 04:40:22 +0100 Subject: arm64: enable generic idle loop Enable generic idle loop for ARM64, so can support for hlt/nohlt command line options to override default idle loop behavior. Acked-by: Catalin Marinas Signed-off-by: Leo Yan Signed-off-by: Will Deacon diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d95663..8b6e378 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL -- cgit v0.10.2 From d10bcd473301888f957ec4b6b12aa3621be78d59 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Sep 2015 18:49:28 +0100 Subject: arm64: head.S: initialise mdcr_el2 in el2_setup When entering the kernel at EL2, we fail to initialise the MDCR_EL2 register which controls debug access and PMU capabilities at EL1. This patch ensures that the register is initialised so that all traps are disabled and all the PMU counters are available to the host. When a guest is scheduled, KVM takes care to configure trapping appropriately. Cc: Acked-by: Marc Zyngier Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a055be6..90d09ed 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr -- cgit v0.10.2 From 8453fecbecae26edb3f278627376caab05d9a88d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:54 +0800 Subject: kvm: don't try to register to KVM_FAST_MMIO_BUS for non mmio eventfd We only want zero length mmio eventfd to be registered on KVM_FAST_MMIO_BUS. So check this explicitly when arg->len is zero to make sure this. Cc: stable@vger.kernel.org Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193..e404806 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -846,7 +846,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) /* When length is ignored, MMIO is also put on a separate bus, for * faster lookups. */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { + if (!args->len && bus_idx == KVM_MMIO_BUS) { ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p->addr, 0, &p->dev); if (ret < 0) @@ -901,7 +901,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { + if (!p->length && p->bus_idx == KVM_MMIO_BUS) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, &p->dev); } -- cgit v0.10.2 From 85da11ca587c8eb73993a1b503052391a73586f9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:55 +0800 Subject: kvm: factor out core eventfd assign/deassign logic This patch factors out core eventfd assign/deassign logic and leaves the argument checking and bus index selection to callers. Cc: stable@vger.kernel.org Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e404806..0829c7f 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -873,14 +847,13 @@ fail: } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -918,6 +891,48 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + + return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + return kvm_assign_ioeventfd_idx(kvm, bus_idx, args); +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { -- cgit v0.10.2 From eefd6b06b17c5478e7c24bea6f64beaa2c431ca6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:56 +0800 Subject: kvm: fix double free for fast mmio eventfd We register wildcard mmio eventfd on two buses, once for KVM_MMIO_BUS and once on KVM_FAST_MMIO_BUS but with a single iodev instance. This will lead to an issue: kvm_io_bus_destroy() knows nothing about the devices on two buses pointing to a single dev. Which will lead to double free[1] during exit. Fix this by allocating two instances of iodevs then registering one on KVM_MMIO_BUS and another on KVM_FAST_MMIO_BUS. CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic #28-Ubuntu Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013 task: ffff88009ae0c4b0 ti: ffff88020e7f0000 task.ti: ffff88020e7f0000 RIP: 0010:[] [] ioeventfd_release+0x28/0x60 [kvm] RSP: 0018:ffff88020e7f3bc8 EFLAGS: 00010292 RAX: dead000000200200 RBX: ffff8801ec19c900 RCX: 000000018200016d RDX: ffff8801ec19cf80 RSI: ffffea0008bf1d40 RDI: ffff8801ec19c900 RBP: ffff88020e7f3bd8 R08: 000000002fc75a01 R09: 000000018200016d R10: ffffffffc07df6ae R11: ffff88022fc75a98 R12: ffff88021e7cc000 R13: ffff88021e7cca48 R14: ffff88021e7cca50 R15: ffff8801ec19c880 FS: 00007fc1ee3e6700(0000) GS:ffff88023e240000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f389d8000 CR3: 000000023dc13000 CR4: 00000000001427e0 Stack: ffff88021e7cc000 0000000000000000 ffff88020e7f3be8 ffffffffc07e2622 ffff88020e7f3c38 ffffffffc07df69a ffff880232524160 ffff88020e792d80 0000000000000000 ffff880219b78c00 0000000000000008 ffff8802321686a8 Call Trace: [] ioeventfd_destructor+0x12/0x20 [kvm] [] kvm_put_kvm+0xca/0x210 [kvm] [] kvm_vcpu_release+0x18/0x20 [kvm] [] __fput+0xe7/0x250 [] ____fput+0xe/0x10 [] task_work_run+0xd4/0xf0 [] do_exit+0x368/0xa50 [] ? recalc_sigpending+0x1f/0x60 [] do_group_exit+0x45/0xb0 [] get_signal+0x291/0x750 [] do_signal+0x28/0xab0 [] ? do_futex+0xdb/0x5d0 [] ? __wake_up_locked_key+0x18/0x20 [] ? SyS_futex+0x76/0x170 [] do_notify_resume+0x69/0xb0 [] int_signal+0x12/0x17 Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 <48> 89 10 48 b8 00 01 10 00 00 RIP [] ioeventfd_release+0x28/0x60 [kvm] RSP Cc: stable@vger.kernel.org Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0829c7f..79db453 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -817,16 +817,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && bus_idx == KVM_MMIO_BUS) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -834,8 +824,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -874,10 +862,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length && p->bus_idx == KVM_MMIO_BUS) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -894,14 +878,19 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); - return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + return ret; } static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { enum kvm_bus bus_idx; + int ret; bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized, or 0 to ignore length */ @@ -930,7 +919,25 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL; - return kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; } int -- cgit v0.10.2 From 8f4216c7d28976f7ec1b2bcbfa0a9f787133c45e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:57 +0800 Subject: kvm: fix zero length mmio searching Currently, if we had a zero length mmio eventfd assigned on KVM_MMIO_BUS. It will never be found by kvm_io_bus_cmp() since it always compares the kvm_io_range() with the length that guest wrote. This will cause e.g for vhost, kick will be trapped by qemu userspace instead of vhost. Fixing this by using zero length if an iodevice is zero length. Cc: stable@vger.kernel.org Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eb4c9d2..9af68db 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3157,10 +3157,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; } -- cgit v0.10.2 From da92b4eb7e2ec9866f14a0c8f453788e8052dee7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:33 +0800 Subject: powerpc, irq: Use access helper irq_data_get_affinity_mask() Use access helper irq_data_get_affinity_mask() so we can move the affinity mask to irq_common_data. Signed-off-by: Jiang Liu Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1433145945-789-25-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4509603..290559d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,7 +441,7 @@ void migrate_irqs(void) chip = irq_data_get_irq_chip(data); - cpumask_and(mask, data->affinity, map); + cpumask_and(mask, irq_data_get_affinity_mask(data), map); if (cpumask_any(mask) >= nr_cpu_ids) { pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 11ac964..27c936c 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -54,7 +54,7 @@ static void ics_opal_unmask_irq(struct irq_data *d) if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return; - server = xics_get_irq_server(d->irq, d->affinity, 0); + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); server = ics_opal_mangle_server(server); rc = opal_set_xive(hw_irq, server, DEFAULT_PRIORITY); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index d1c625c..3854dd4 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -47,7 +47,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return; - server = xics_get_irq_server(d->irq, d->affinity, 0); + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, DEFAULT_PRIORITY); -- cgit v0.10.2 From dc2ec62f75fd9be78d07b3b453aeb0d201ead419 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 13:34:05 +0200 Subject: net/mlx4_en: Use access helper irq_data_get_affinity_mask() This is a preparatory patch for moving irq_data struct members. Search and replace was done with coccinelle Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu Cc: Amir Vadai diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4402a1e..4c7de8c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1047,13 +1047,15 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { - int cpu_curr; const struct cpumask *aff; + struct irq_data *idata; + int cpu_curr; INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); - aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; + idata = irq_desc_get_irq_data(cq->irq_desc); + aff = irq_data_get_affinity_mask(idata); if (likely(cpumask_test_cpu(cpu_curr, aff))) return budget; -- cgit v0.10.2 From e4084a16bbe07957811c75dfb7c9bf25c5862ba0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Sep 2015 13:37:03 +0100 Subject: platform-msi: Do not cache msi_desc in handler_data The current implementation of platform MSI caches the msi_desc pointer in irq_data::handler_data. This is a bit silly, as we also have irq_data::msi_desc, which is perfectly valid. Remove the useless assignment and simplify the whole flow. Reported-by: Ma Jun Signed-off-by: Marc Zyngier Reviewed-by: Jiang Liu Link: http://lkml.kernel.org/r/1442147824-20971-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 1857a5d..134483d 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -63,20 +63,8 @@ static int platform_msi_init(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, msi_alloc_info_t *arg) { - struct irq_data *data; - - irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - info->chip, info->chip_data); - - /* - * Save the MSI descriptor in handler_data so that the - * irq_write_msi_msg callback can retrieve it (and the - * associated device). - */ - data = irq_domain_get_irq_data(domain, virq); - data->handler_data = arg->desc; - - return 0; + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + info->chip, info->chip_data); } #else #define platform_msi_set_desc NULL @@ -97,7 +85,7 @@ static void platform_msi_update_dom_ops(struct msi_domain_info *info) static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { - struct msi_desc *desc = irq_data_get_irq_handler_data(data); + struct msi_desc *desc = irq_data_get_msi_desc(data); struct platform_msi_priv_data *priv_data; priv_data = desc->platform.msi_priv_data; -- cgit v0.10.2 From 12e14066f4835f5ee1ca795f0309415b54c067a9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Sep 2015 12:14:31 +0100 Subject: irqchip/GIC: Add workaround for aliased GIC400 The GICv2 architecture mandates that the two 4kB GIC regions are contiguous, and on two separate physical pages (so that access to the second page can be trapped by a hypervisor). This doesn't work very well when PAGE_SIZE is 64kB. A relatively common hack^Wway to work around this is to alias each 4kB region over its own 64kB page. Of course in this case, the base address you want to use is not really the begining of the region, but base + 60kB (so that you get a contiguous 8kB region over two distinct pages). Normally, this would be described in DT with a new property, but some HW is already out there, and the firmware makes sure that it will override whatever you put in the GIC node. Duh. And of course, said firmware source code is not available, despite being based on u-boot. The workaround is to detect the case where the CPU interface size is set to 128kB, and verify the aliasing by checking that the ID register for GIC400 (which is the only GIC wired this way so far) is the same at base and base + 0xF000. In this case, we update the GIC base address and let it roll. And if you feel slightly sick by looking at this, rest assured that I do too... Reported-by: Julien Grall Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Stuart Yoder Cc: Pavel Fedin Cc: Jason Cooper Link: http://lkml.kernel.org/r/1442142873-20213-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index e6b7ed5..a947057 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1119,12 +1119,49 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, #ifdef CONFIG_OF static int gic_cnt __initdata; +static bool gic_check_eoimode(struct device_node *node, void __iomem **base) +{ + struct resource cpuif_res; + + of_address_to_resource(node, 1, &cpuif_res); + + if (!is_hyp_mode_available()) + return false; + if (resource_size(&cpuif_res) < SZ_8K) + return false; + if (resource_size(&cpuif_res) == SZ_128K) { + u32 val_low, val_high; + + /* + * Verify that we have the first 4kB of a GIC400 + * aliased over the first 64kB by checking the + * GICC_IIDR register on both ends. + */ + val_low = readl_relaxed(*base + GIC_CPU_IDENT); + val_high = readl_relaxed(*base + GIC_CPU_IDENT + 0xf000); + if ((val_low & 0xffff0fff) != 0x0202043B || + val_low != val_high) + return false; + + /* + * Move the base up by 60kB, so that we have a 8kB + * contiguous region, which allows us to use GICC_DIR + * at its normal offset. Please pass me that bucket. + */ + *base += 0xf000; + cpuif_res.start += 0xf000; + pr_warn("GIC: Adjusting CPU interface base to %pa", + &cpuif_res.start); + } + + return true; +} + static int __init gic_of_init(struct device_node *node, struct device_node *parent) { void __iomem *cpu_base; void __iomem *dist_base; - struct resource cpu_res; u32 percpu_offset; int irq; @@ -1137,14 +1174,11 @@ gic_of_init(struct device_node *node, struct device_node *parent) cpu_base = of_iomap(node, 1); WARN(!cpu_base, "unable to map gic cpu registers\n"); - of_address_to_resource(node, 1, &cpu_res); - /* * Disable split EOI/Deactivate if either HYP is not available * or the CPU interface is too small. */ - if (gic_cnt == 0 && (!is_hyp_mode_available() || - resource_size(&cpu_res) < SZ_8K)) + if (gic_cnt == 0 && !gic_check_eoimode(node, &cpu_base)) static_key_slow_dec(&supports_deactivate); if (of_property_read_u32(node, "cpu-offset", &percpu_offset)) -- cgit v0.10.2 From 5a9a8915c8888b615521b17d70a4342187eae60b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Sep 2015 12:14:32 +0100 Subject: irqchip/gic-v3-its: Add missing cache flushes When the ITS is configured for non-cacheable transactions, make sure that the allocated, zeroed memory is flushed to the Point of Coherency, allowing the ITS to observe the zeros instead of random garbage (or even get its own data overwritten by zeros being evicted from the cache...). Fixes: 241a386c7dbb "irqchip: gicv3-its: Use non-cacheable accesses when no shareability" Reported-and-tested-by: Stuart Yoder Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Pavel Fedin Cc: Jason Cooper Link: http://lkml.kernel.org/r/1442142873-20213-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 26b55c5..ac7ae2b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -898,8 +898,10 @@ retry_baser: * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) + if (!shr) { cache = GITS_BASER_nC; + __flush_dcache_area(base, alloc_size); + } goto retry_baser; } @@ -1140,6 +1142,8 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, return NULL; } + __flush_dcache_area(itt, sz); + dev->its = its; dev->itt = itt; dev->nr_ites = nr_ites; -- cgit v0.10.2 From 157add60cb35913b8a848a3d7e6456b8ed134796 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Sun, 13 Sep 2015 12:14:33 +0100 Subject: irqchip/GICv2m: Fix GICv2m build warning on 32 bits After GICv2m was enabled for 32-bit ARM kernel, a warning popped up: drivers/irqchip/irq-gic-v2m.c: In function gicv2m_compose_msi_msg: drivers/irqchip/irq-gic-v2m.c:100:2: warning: right shift count >= width of type [enabled by default] msg->address_hi = (u32) (addr >> 32); ^ This patch fixes it by using proper macros for splitting up the value. Signed-off-by: Pavel Fedin Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Stuart Yoder Cc: Jason Cooper Link: http://lkml.kernel.org/r/1442142873-20213-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index db04fc1..12985da 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -95,8 +95,8 @@ static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct v2m_data *v2m = irq_data_get_irq_chip_data(data); phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS; - msg->address_hi = (u32) (addr >> 32); - msg->address_lo = (u32) (addr); + msg->address_hi = upper_32_bits(addr); + msg->address_lo = lower_32_bits(addr); msg->data = data->hwirq; } -- cgit v0.10.2 From b1370658804510f6a1c0517a8ff1c9534d371edd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Sep 2015 13:42:53 +0200 Subject: irqchip/renesas-irqc: Use a separate lockdep class The renesas-irqc interrupt controller is cascaded to the GIC. Hence when propagating wake-up settings to its parent interrupt controller, the following lockdep warning is printed: ============================================= [ INFO: possible recursive locking detected ] 4.2.0-ape6evm-10725-g50fcd7643c034198 #280 Not tainted --------------------------------------------- s2ram/1072 is trying to acquire lock: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x58/0x98 but task is already holding lock: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x58/0x98 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&irq_desc_lock_class); lock(&irq_desc_lock_class); *** DEADLOCK *** May be due to missing lock nesting notation 6 locks held by s2ram/1072: #0: (sb_writers#7){.+.+.+}, at: [] __sb_start_write+0xa0/0xa8 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x4c/0x1bc #2: (s_active#24){.+.+.+}, at: [] kernfs_fop_write+0x54/0x1bc #3: (pm_mutex){+.+.+.}, at: [] pm_suspend+0x10c/0x510 #4: (&dev->mutex){......}, at: [] __device_suspend+0xdc/0x2cc #5: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x58/0x98 stack backtrace: CPU: 0 PID: 1072 Comm: s2ram Not tainted 4.2.0-ape6evm-10725-g50fcd7643c034198 #280 Hardware name: Generic R8A73A4 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x88/0x98) [] (dump_stack) from [] (__lock_acquire+0x15cc/0x20e4) [] (__lock_acquire) from [] (lock_acquire+0xac/0x12c) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x40/0x54) [] (_raw_spin_lock_irqsave) from [] (__irq_get_desc_lock+0x58/0x98) [] (__irq_get_desc_lock) from [] (irq_set_irq_wake+0x20/0xf8) [] (irq_set_irq_wake) from [] (irqc_irq_set_wake+0x20/0x4c) [] (irqc_irq_set_wake) from [] (irq_set_irq_wake+0x8c/0xf8) [] (irq_set_irq_wake) from [] (gpio_keys_suspend+0x74/0xc0) [] (gpio_keys_suspend) from [] (dpm_run_callback+0x54/0x124) Avoid this false positive by using a separate lockdep class for IRQC interrupts. Signed-off-by: Geert Uytterhoeven Cc: Grygorii Strashko Cc: Magnus Damm Cc: Jason Cooper Link: http://lkml.kernel.org/r/1441798974-25716-2-git-send-email-geert%2Brenesas@glider.be Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 2aa3add..4652cc3 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -150,6 +150,12 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id) return IRQ_NONE; } +/* + * This lock class tells lockdep that IRQC irqs are in a different + * category than their parents, so it won't report false recursion. + */ +static struct lock_class_key irqc_irq_lock_class; + static int irqc_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -157,6 +163,7 @@ static int irqc_irq_domain_map(struct irq_domain *h, unsigned int virq, irqc_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); + irq_set_lockdep_class(virq, &irqc_irq_lock_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } -- cgit v0.10.2 From 769b5cf78e6c653c2f513649ee6c4e7a06723872 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Sep 2015 13:42:54 +0200 Subject: irqchip/renesas-intc-irqpin: Use a separate lockdep class The renesas-intc-irqpin interrupt controller is cascaded to the GIC. Hence when propagating wake-up settings to its parent interrupt controller, the following lockdep warning is printed: ============================================= [ INFO: possible recursive locking detected ] 4.2.0-armadillo-10725-g50fcd7643c034198 #781 Not tainted --------------------------------------------- s2ram/1179 is trying to acquire lock: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x78/0x94 but task is already holding lock: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x78/0x94 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&irq_desc_lock_class); lock(&irq_desc_lock_class); *** DEADLOCK *** May be due to missing lock nesting notation 7 locks held by s2ram/1179: #0: (sb_writers#7){.+.+.+}, at: [] __sb_start_write+0x64/0xb8 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x78/0x1a0 #2: (s_active#23){.+.+.+}, at: [] kernfs_fop_write+0x80/0x1a0 #3: (autosleep_lock){+.+.+.}, at: [] pm_autosleep_lock+0x18/0x20 #4: (pm_mutex){+.+.+.}, at: [] pm_suspend+0x54/0x248 #5: (&dev->mutex){......}, at: [] __device_suspend+0xdc/0x240 #6: (&irq_desc_lock_class){-.-...}, at: [] __irq_get_desc_lock+0x78/0x94 stack backtrace: CPU: 0 PID: 1179 Comm: s2ram Not tainted 4.2.0-armadillo-10725-g50fcd7643c034198 Hardware name: Generic R8A7740 (Flattened Device Tree) [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [] (show_stack) from [] (dump_stack+0x20/0x28) [] (dump_stack) from [] (__lock_acquire+0x67c/0x1b88) [] (__lock_acquire) from [] (lock_acquire+0x9c/0xbc) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x44/0x58) [] (_raw_spin_lock_irqsave) from [] (__irq_get_desc_lock+0x78/0x94 [] (__irq_get_desc_lock) from [] (irq_set_irq_wake+0x28/0x100) [] (irq_set_irq_wake) from [] (intc_irqpin_irq_set_wake+0x24/0x4c) [] (intc_irqpin_irq_set_wake) from [] (set_irq_wake_real+0x3c/0x50 [] (set_irq_wake_real) from [] (irq_set_irq_wake+0x64/0x100) [] (irq_set_irq_wake) from [] (gpio_keys_suspend+0x60/0xa0) [] (gpio_keys_suspend) from [] (platform_pm_suspend+0x3c/0x5c) Avoid this false positive by using a separate lockdep class for INTC External IRQ Pin interrupts. Signed-off-by: Geert Uytterhoeven Cc: Grygorii Strashko Cc: Magnus Damm Cc: Jason Cooper Link: http://lkml.kernel.org/r/1441798974-25716-3-git-send-email-geert%2Brenesas@glider.be Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 0670ab4..67797ee 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -332,6 +332,12 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) return status; } +/* + * This lock class tells lockdep that INTC External IRQ Pin irqs are in a + * different category than their parents, so it won't report false recursion. + */ +static struct lock_class_key intc_irqpin_irq_lock_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -342,6 +348,7 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); set_irq_flags(virq, IRQF_VALID); /* kill me now */ return 0; -- cgit v0.10.2 From f4e209cdc7a00f934007f40cf885471799073b0d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 8 Sep 2015 19:00:35 +0200 Subject: irqchip/renesas-intc-irqpin: Propagate wake-up settings to parent The renesas-intc-irqpin interrupt controller is cascaded to the GIC, but its driver doesn't propagate wake-up settings to the parent interrupt controller. Since commit aec89ef72ba6c944 ("irqchip/gic: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND"), the GIC driver masks interrupts during suspend, and wake-up through gpio-keys now fails on r8a7740/armadillo and sh73a0/kzm9g. Fix this by propagating wake-up settings to the parent interrupt controller. There's no need to handle irq_set_irq_wake() failures, as the renesas-intc-irqpin interrupt controller is always cascaded to a GIC, and the GIC driver always sets SKIP_SET_WAKE since the aforementioned commit. Signed-off-by: Geert Uytterhoeven Cc: Sudeep Holla Cc: Magnus Damm Cc: Jason Cooper Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1441731636-17610-2-git-send-email-geert%2Brenesas@glider.be Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 67797ee..d3546a6 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -283,6 +283,9 @@ static int intc_irqpin_irq_set_type(struct irq_data *d, unsigned int type) static int intc_irqpin_irq_set_wake(struct irq_data *d, unsigned int on) { struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irq_set_irq_wake(p->irq[hw_irq].requested_irq, on); if (!p->clk) return 0; -- cgit v0.10.2 From 4cd7863ecb90010533c178fba6ecc84d5529b402 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 8 Sep 2015 19:00:36 +0200 Subject: irqchip/renesas-irqc: Propagate wake-up settings to parent The renesas-irqc interrupt controller is cascaded to the GIC, but its driver doesn't propagate wake-up settings to the parent interrupt controller. Since commit aec89ef72ba6c944 ("irqchip/gic: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND"), the GIC driver masks interrupts during suspend, and wake-up through gpio-keys now fails on r8a73a4/ape6evm. Fix this by propagating wake-up settings to the parent interrupt controller. There's no need to handle irq_set_irq_wake() failures, as the renesas-irqc interrupt controller is always cascaded to a GIC, and the GIC driver always sets SKIP_SET_WAKE since the aforementioned commit. Signed-off-by: Geert Uytterhoeven Cc: Sudeep Holla Cc: Magnus Damm Cc: Jason Cooper Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1441731636-17610-3-git-send-email-geert%2Brenesas@glider.be Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 4652cc3..35bf97b 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -121,6 +121,9 @@ static int irqc_irq_set_type(struct irq_data *d, unsigned int type) static int irqc_irq_set_wake(struct irq_data *d, unsigned int on) { struct irqc_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irq_set_irq_wake(p->irq[hw_irq].requested_irq, on); if (!p->clk) return 0; -- cgit v0.10.2 From 3829c664b1eec243f2a355829efa40f0f414de8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 13:47:24 +0200 Subject: genirq: Remove stale comment Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index 6f8b340..72a6b2f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -151,10 +151,6 @@ struct irq_common_data { * methods, to allow shared chip implementations * @msi_desc: MSI descriptor * @affinity: IRQ affinity on SMP - * - * The fields here need to overlay the ones in irq_desc until we - * cleaned up the direct references and switched everything over to - * irq_data. */ struct irq_data { u32 mask; -- cgit v0.10.2 From 6584d84c3e504c76ad291cc2e381bbeed59798ab Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 1 Sep 2015 10:35:50 +0800 Subject: genirq: Update the comment for generic_handle_irq_desc __do_IRQ() was removed by commit 1c77ff2 "genirq: Remove __do_IRQ", but the comment referring to __do_IRQ() was left. Update the comment for generic_handle_irq_desc(). Signed-off-by: Huang Shijie Cc: jiang.liu@linux.intel.com Cc: peterz@infradead.org Cc: rafael.j.wysocki@intel.com Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Link: http://lkml.kernel.org/r/1441074950-3893-1-git-send-email-shijie.huang@arm.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5acfa26..0593c69 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -137,9 +137,7 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) /* * Architectures call this to let the generic IRQ layer - * handle an interrupt. If the descriptor is attached to an - * irqchip-style controller then we call the ->handle_irq() handler, - * and it calls __do_IRQ() if it's attached to an irqtype-style controller. + * handle an interrupt. */ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { -- cgit v0.10.2 From 237865f195f6b10e4724ce49eeb3972641da882a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 15 Sep 2015 13:18:04 -0500 Subject: PCI: Revert "PCI: Call pci_read_bridge_bases() from core instead of arch code" Revert dff22d2054b5 ("PCI: Call pci_read_bridge_bases() from core instead of arch code"). Reading PCI bridge windows is not arch-specific in itself, but there is PCI core code that doesn't work correctly if we read them too early. For example, Hannes found this case on an ARM Freescale i.mx6 board: pci_bus 0000:00: root bus resource [mem 0x01000000-0x01efffff] pci 0000:00:00.0: PCI bridge to [bus 01-ff] pci 0000:00:00.0: BAR 8: no space for [mem size 0x01000000] (mem window) pci 0000:01:00.0: BAR 2: failed to assign [mem size 0x00200000] pci 0000:01:00.0: BAR 1: failed to assign [mem size 0x00004000] pci 0000:01:00.0: BAR 0: failed to assign [mem size 0x00000100] The 00:00.0 mem window needs to be at least 3MB: the 01:00.0 device needs 0x204100 of space, and mem windows are megabyte-aligned. Bus sizing can increase a bridge window size, but never *decrease* it (see d65245c3297a ("PCI: don't shrink bridge resources")). Prior to dff22d2054b5, ARM didn't read bridge windows at all, so the "original size" was zero, and we assigned a 3MB window. After dff22d2054b5, we read the bridge windows before sizing the bus. The firmware programmed a 16MB window (size 0x01000000) in 00:00.0, and since we never decrease the size, we kept 16MB even though we only needed 3MB. But 16MB doesn't fit in the host bridge aperture, so we failed to assign space for the window and the downstream devices. I think this is a defect in the PCI core: we shouldn't rely on the firmware to assign sensible windows. Ray reported a similar problem, also on ARM, with Broadcom iProc. Issues like this are too hard to fix right now, so revert dff22d2054b5. Reported-by: Hannes Reported-by: Ray Jui Link: http://lkml.kernel.org/r/CAAa04yFQEUJm7Jj1qMT57-LG7ZGtnhNDBe=PpSRa70Mj+XhW-A@mail.gmail.com Link: http://lkml.kernel.org/r/55F75BB8.4070405@broadcom.com Signed-off-by: Bjorn Helgaas Acked-by: Yinghai Lu Acked-by: Lorenzo Pieralisi diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index cded02c..5f387ee 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -242,7 +242,12 @@ pci_restore_srm_config(void) void pcibios_fixup_bus(struct pci_bus *bus) { - struct pci_dev *dev; + struct pci_dev *dev = bus->self; + + if (pci_has_flag(PCI_PROBE_ONLY) && dev && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_read_bridge_bases(bus); + } list_for_each_entry(dev, &bus->devices, bus_list) { pdev_save_srm_config(dev); diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c index f9c86c4..f211839 100644 --- a/arch/frv/mb93090-mb00/pci-vdk.c +++ b/arch/frv/mb93090-mb00/pci-vdk.c @@ -294,6 +294,8 @@ void pcibios_fixup_bus(struct pci_bus *bus) printk("### PCIBIOS_FIXUP_BUS(%d)\n",bus->number); #endif + pci_read_bridge_bases(bus); + if (bus->number == 0) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index d89b601..7cc3be9 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -533,9 +533,10 @@ void pcibios_fixup_bus(struct pci_bus *b) { struct pci_dev *dev; - if (b->self) + if (b->self) { + pci_read_bridge_bases(b); pcibios_fixup_bridge_resources(b->self); - + } list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); platform_pci_fixup_bus(b); diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 6b8b752..ae838ed 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -863,7 +863,14 @@ void pcibios_setup_bus_devices(struct pci_bus *bus) void pcibios_fixup_bus(struct pci_bus *bus) { - /* Fixup the bus */ + /* When called from the generic PCI probe, read PCI<->PCI bridge + * bases. This is -not- called when generating the PCI tree from + * the OF device-tree. + */ + if (bus->self != NULL) + pci_read_bridge_bases(bus); + + /* Now fixup the bus bus */ pcibios_setup_bus_self(bus); /* Now fixup devices on that bus */ diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index c6996cf..b8a0bf5 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -311,6 +311,12 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) void pcibios_fixup_bus(struct pci_bus *bus) { + struct pci_dev *dev = bus->self; + + if (pci_has_flag(PCI_PROBE_ONLY) && dev && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_read_bridge_bases(bus); + } } EXPORT_SYMBOL(PCIBIOS_MIN_IO); diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c index deaa893..3dfe2d3 100644 --- a/arch/mn10300/unit-asb2305/pci.c +++ b/arch/mn10300/unit-asb2305/pci.c @@ -324,6 +324,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) struct pci_dev *dev; if (bus->self) { + pci_read_bridge_bases(bus); pcibios_fixup_bridge_resources(bus->self); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a1d0632..7587b2a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1032,7 +1032,13 @@ void pcibios_set_master(struct pci_dev *dev) void pcibios_fixup_bus(struct pci_bus *bus) { - /* Fixup the bus */ + /* When called from the generic PCI probe, read PCI<->PCI bridge + * bases. This is -not- called when generating the PCI tree from + * the OF device-tree. + */ + pci_read_bridge_bases(bus); + + /* Now fixup the bus bus */ pcibios_setup_bus_self(bus); /* Now fixup devices on that bus */ diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 09d3afc..dc78a4a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -166,6 +166,7 @@ void pcibios_fixup_bus(struct pci_bus *b) { struct pci_dev *dev; + pci_read_bridge_bases(b); list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); } diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index d27b4dc..b848cc3 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -210,6 +210,10 @@ subsys_initcall(pcibios_init); void pcibios_fixup_bus(struct pci_bus *bus) { + if (bus->parent) { + /* This is a subordinate bridge */ + pci_read_bridge_bases(bus); + } } void pcibios_set_master(struct pci_dev *dev) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index baec33c..a0580af 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -560,6 +560,9 @@ dino_fixup_bus(struct pci_bus *bus) } else if (bus->parent) { int i; + pci_read_bridge_bases(bus); + + for(i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { if((bus->self->resource[i].flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0) diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 7b9e89b..a32c1f6 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -693,6 +693,7 @@ lba_fixup_bus(struct pci_bus *bus) if (bus->parent) { int i; /* PCI-PCI Bridge */ + pci_read_bridge_bases(bus); for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) pci_claim_bridge_resource(bus->self, i); } else { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0b2be17..c8cc0e62 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -855,9 +855,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) child->bridge_ctl = bctl; } - /* Read and initialize bridge resources */ - pci_read_bridge_bases(child); - cmax = pci_scan_child_bus(child); if (cmax > subordinate) dev_warn(&dev->dev, "bridge has subordinate %02x but max busn %02x\n", @@ -918,9 +915,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) if (!is_cardbus) { child->bridge_ctl = bctl; - - /* Read and initialize bridge resources */ - pci_read_bridge_bases(child); max = pci_scan_child_bus(child); } else { /* -- cgit v0.10.2 From 09a77a885233e2a20dac2635a79c83ccf50a26a1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 15 Sep 2015 16:03:36 +0100 Subject: modsign: Fix GPL/OpenSSL licence incompatibility The GPL does not permit us to link against the OpenSSL library. Use LGPL for sign-file and extract-file instead. [ The whole "openssl isn't compatible with gpl" is really just fear-mongering, but there's no reason not to make modsign LGPL, so nobody cares. - Linus ] Reported-by: Julian Andres Klode Signed-off-by: David Woodhouse Signed-off-by: David Howells Reviewed-by: Julian Andres Klode Signed-off-by: Linus Torvalds diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 10d23ca..6ce5945 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -1,15 +1,15 @@ /* Extract X.509 certificate in DER form from PKCS#11 or PEM. * - * Copyright © 2014 Red Hat, Inc. All Rights Reserved. - * Copyright © 2015 Intel Corporation. + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. * * Authors: David Howells * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 058bba3..c3899ca 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -1,12 +1,15 @@ /* Sign a module file using the given key. * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. + * + * Authors: David Howells + * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include -- cgit v0.10.2 From a3c119d392d7d7c68865fe76f5732ca9b8164d68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:05 -0700 Subject: ipv6: Refactor common ip6gre_tunnel_init codes It is a prep work to fix the dst_entry refcnt bugs in ip6_tunnel. This patch refactors some common init codes used by both ip6gre_tunnel_init and ip6gre_tap_init. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4038c69..af60d46 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1245,7 +1245,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) netif_keep_dst(dev); } -static int ip6gre_tunnel_init(struct net_device *dev) +static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1255,16 +1255,30 @@ static int ip6gre_tunnel_init(struct net_device *dev) tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int ret; + + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; + + tunnel = netdev_priv(dev); + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - return 0; } @@ -1460,19 +1474,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int ret; - tunnel = netdev_priv(dev); + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; - tunnel->dev = dev; - tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - return 0; } -- cgit v0.10.2 From f230d1e891ba1da5953460516960894154f265db Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:06 -0700 Subject: ipv6: Rename the dst_cache helper functions in ip6_tunnel It is a prep work to fix the dst_entry refcnt bugs in ip6_tunnel. This patch rename: 1. ip6_tnl_dst_check() to ip6_tnl_dst_get() to better reflect that it will take a dst refcnt in the next patch. 2. ip6_tnl_dst_store() to ip6_tnl_dst_set() to have a more conventional name matching with ip6_tnl_dst_get(). Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa..979b081 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -60,9 +60,9 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t); +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst); +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index af60d46..24f5dd8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -634,7 +634,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_check(tunnel); + dst = ip6_tnl_dst_get(tunnel); if (!dst) { ndst = ip6_route_output(net, NULL, fl6); @@ -763,7 +763,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ip6tunnel_xmit(NULL, skb, dev); if (ndst) - ip6_tnl_dst_store(tunnel, ndst); + ip6_tnl_dst_set(tunnel, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b0ab420..599b0b4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) { struct dst_entry *dst = t->dst_cache; @@ -139,7 +139,7 @@ struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) return dst; } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); void ip6_tnl_dst_reset(struct ip6_tnl *t) { @@ -148,14 +148,14 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t) } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; t->dst_cookie = rt6_get_cookie(rt); dst_release(t->dst_cache); t->dst_cache = dst; } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -1010,7 +1010,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_check(t); + dst = ip6_tnl_dst_get(t); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; @@ -1102,7 +1102,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->daddr = fl6->daddr; ip6tunnel_xmit(NULL, skb, dev); if (ndst) - ip6_tnl_dst_store(t, ndst); + ip6_tnl_dst_set(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; -- cgit v0.10.2 From cdf3464e6c6bd764277cbbe992cd12da735b92fb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:07 -0700 Subject: ipv6: Fix dst_entry refcnt bugs in ip6_tunnel Problems in the current dst_entry cache in the ip6_tunnel: 1. ip6_tnl_dst_set is racy. There is no lock to protect it: - One major problem is that the dst refcnt gets messed up. F.e. the same dst_cache can be released multiple times and then triggering the infamous dst refcnt < 0 warning message. - Another issue is the inconsistency between dst_cache and dst_cookie. It can be reproduced by adding and removing the ip6gre tunnel while running a super_netperf TCP_CRR test. 2. ip6_tnl_dst_get does not take the dst refcnt before returning the dst. This patch: 1. Create a percpu dst_entry cache in ip6_tnl 2. Use a spinlock to protect the dst_cache operations 3. ip6_tnl_dst_get always takes the dst refcnt before returning Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 979b081..60b4f40 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -32,6 +32,12 @@ struct __ip6_tnl_parm { __be32 o_key; }; +struct ip6_tnl_dst { + spinlock_t lock; + struct dst_entry *dst; + u32 cookie; +}; + /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ @@ -39,8 +45,7 @@ struct ip6_tnl { struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ - struct dst_entry *dst_cache; /* cached dst */ - u32 dst_cookie; + struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ int err_count; unsigned long err_time; @@ -61,6 +66,8 @@ struct ipv6_tlv_tnl_enc_lim { } __packed; struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); +int ip6_tnl_dst_init(struct ip6_tnl *t); +void ip6_tnl_dst_destroy(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 24f5dd8..6465124 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -637,17 +637,17 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, dst = ip6_tnl_dst_get(tunnel); if (!dst) { - ndst = ip6_route_output(net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); - if (ndst->error) + if (dst->error) goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto tx_err_link_failure; } - dst = ndst; + ndst = dst; } tdev = dst->dev; @@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb = new_skb; } - if (fl6->flowi6_mark) { - skb_dst_set(skb, dst); - ndst = NULL; - } else { - skb_dst_set_noref(skb, dst); - } + if (!fl6->flowi6_mark && ndst) + ip6_tnl_dst_set(tunnel, ndst); + skb_dst_set(skb, dst); proto = NEXTHDR_GRE; if (encap_limit >= 0) { @@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); ip6tunnel_xmit(NULL, skb, dev); - if (ndst) - ip6_tnl_dst_set(tunnel, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = { static void ip6gre_dev_free(struct net_device *dev) { + struct ip6_tnl *t = netdev_priv(dev); + + ip6_tnl_dst_destroy(t); free_percpu(dev->tstats); free_netdev(dev); } @@ -1248,6 +1246,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; + int ret; tunnel = netdev_priv(dev); @@ -1259,6 +1258,13 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + ret = ip6_tnl_dst_init(tunnel); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 599b0b4..851cf6d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,37 +126,90 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) +static void __ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, + struct dst_entry *dst) { - struct dst_entry *dst = t->dst_cache; - - if (dst && dst->obsolete && - !dst->ops->check(dst, t->dst_cookie)) { - t->dst_cache = NULL; - dst_release(dst); - return NULL; + dst_release(idst->dst); + if (dst) { + dst_hold(dst); + idst->cookie = rt6_get_cookie((struct rt6_info *)dst); + } else { + idst->cookie = 0; } + idst->dst = dst; +} + +static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, + struct dst_entry *dst) +{ + spin_lock_bh(&idst->lock); + __ip6_tnl_per_cpu_dst_set(idst, dst); + spin_unlock_bh(&idst->lock); +} + +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) +{ + struct ip6_tnl_dst *idst; + struct dst_entry *dst; + + idst = raw_cpu_ptr(t->dst_cache); + spin_lock_bh(&idst->lock); + dst = idst->dst; + if (dst) { + if (!dst->obsolete || dst->ops->check(dst, idst->cookie)) { + dst_hold(idst->dst); + } else { + __ip6_tnl_per_cpu_dst_set(idst, NULL); + dst = NULL; + } + } + spin_unlock_bh(&idst->lock); return dst; } EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); void ip6_tnl_dst_reset(struct ip6_tnl *t) { - dst_release(t->dst_cache); - t->dst_cache = NULL; + int i; + + for_each_possible_cpu(i) + ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt6_get_cookie(rt); - dst_release(t->dst_cache); - t->dst_cache = dst; + ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); + } EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); +void ip6_tnl_dst_destroy(struct ip6_tnl *t) +{ + if (!t->dst_cache) + return; + + ip6_tnl_dst_reset(t); + free_percpu(t->dst_cache); +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); + +int ip6_tnl_dst_init(struct ip6_tnl *t) +{ + int i; + + t->dst_cache = alloc_percpu(struct ip6_tnl_dst); + if (!t->dst_cache) + return -ENOMEM; + + for_each_possible_cpu(i) + spin_lock_init(&per_cpu_ptr(t->dst_cache, i)->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); + /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @remote: the address of the tunnel exit-point @@ -271,6 +324,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) static void ip6_dev_free(struct net_device *dev) { + struct ip6_tnl *t = netdev_priv(dev); + + ip6_tnl_dst_destroy(t); free_percpu(dev->tstats); free_netdev(dev); } @@ -1016,17 +1072,17 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, goto tx_err_link_failure; if (!dst) { - ndst = ip6_route_output(net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); - if (ndst->error) + if (dst->error) goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto tx_err_link_failure; } - dst = ndst; + ndst = dst; } tdev = dst->dev; @@ -1072,12 +1128,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, consume_skb(skb); skb = new_skb; } - if (fl6->flowi6_mark) { - skb_dst_set(skb, dst); - ndst = NULL; - } else { - skb_dst_set_noref(skb, dst); - } + + if (!fl6->flowi6_mark && ndst) + ip6_tnl_dst_set(t, ndst); + skb_dst_set(skb, dst); + skb->transport_header = skb->network_header; proto = fl6->flowi6_proto; @@ -1101,14 +1156,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; ip6tunnel_xmit(NULL, skb, dev); - if (ndst) - ip6_tnl_dst_set(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -1573,12 +1626,21 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int ret; t->dev = dev; t->net = dev_net(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + + ret = ip6_tnl_dst_init(t); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + return 0; } -- cgit v0.10.2 From 8e3d5be7368107f0c27a1f8126d79b01a47e9567 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:08 -0700 Subject: ipv6: Avoid double dst_free It is a prep work to get dst freeing from fib tree undergo a rcu grace period. The following is a common paradigm: if (ip6_del_rt(rt)) dst_free(rt) which means, if rt cannot be deleted from the fib tree, dst_free(rt) now. 1. We don't know the ip6_del_rt(rt) failure is because it was not managed by fib tree (e.g. DST_NOCACHE) or it had already been removed from the fib tree. 2. If rt had been managed by the fib tree, ip6_del_rt(rt) failure means dst_free(rt) has been called already. A second dst_free(rt) is not always obviously safe. The rt may have been destroyed already. 3. If rt is a DST_NOCACHE, dst_free(rt) should not be called. 4. It is a stopper to make dst freeing from fib tree undergo a rcu grace period. This patch is to use a DST_NOCACHE flag to indicate a rt is not managed by the fib tree. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 030fefd..9001133 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5127,13 +5127,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); - if (rt && ip6_del_rt(rt)) - dst_free(&rt->dst); + if (rt) + ip6_del_rt(rt); } dst_hold(&ifp->rt->dst); - if (ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->dst); + ip6_del_rt(ifp->rt); rt_genid_bump_ipv6(net); break; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 418d982..e68350b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -933,6 +933,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); + if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) && + !atomic_read(&rt->dst.__refcnt))) + return -EINVAL; + if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1025,6 +1029,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) fib6_prune_clones(info->nl_net, pn); + rt->dst.flags &= ~DST_NOCACHE; } out: @@ -1049,7 +1054,8 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - dst_free(&rt->dst); + if (!(rt->dst.flags & DST_NOCACHE)) + dst_free(&rt->dst); } return err; @@ -1060,7 +1066,8 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - dst_free(&rt->dst); + if (!(rt->dst.flags & DST_NOCACHE)) + dst_free(&rt->dst); return err; #endif } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 53617d7..3d3c1b2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1322,8 +1322,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { dst_hold(&rt->dst); - if (ip6_del_rt(rt)) - dst_free(&rt->dst); + ip6_del_rt(rt); } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; } @@ -2028,7 +2027,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry || + rt->dst.flags & DST_NOCACHE) { err = -ENOENT; goto out; } @@ -2515,6 +2515,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + rt->dst.flags |= DST_NOCACHE; atomic_set(&rt->dst.__refcnt, 1); -- cgit v0.10.2 From 70da5b5c532f0ec8aa76b4f46158da5f010f34b3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:09 -0700 Subject: ipv6: Replace spinlock with seqlock and rcu in ip6_tunnel This patch uses a seqlock to ensure consistency between idst->dst and idst->cookie. It also makes dst freeing from fib tree to undergo a rcu grace period. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 60b4f40..65c2a93 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -33,8 +33,8 @@ struct __ip6_tnl_parm { }; struct ip6_tnl_dst { - spinlock_t lock; - struct dst_entry *dst; + seqlock_t lock; + struct dst_entry __rcu *dst; u32 cookie; }; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e68350b..8a9ec01 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -155,6 +155,11 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } +static void rt6_rcu_free(struct rt6_info *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -169,7 +174,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); pcpu_rt = *ppcpu_rt; if (pcpu_rt) { - dst_free(&pcpu_rt->dst); + rt6_rcu_free(pcpu_rt); *ppcpu_rt = NULL; } } @@ -181,7 +186,7 @@ static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) { rt6_free_pcpu(rt); - dst_free(&rt->dst); + rt6_rcu_free(rt); } } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 851cf6d..983f0d2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,45 +126,48 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -static void __ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, - struct dst_entry *dst) +static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, + struct dst_entry *dst) { - dst_release(idst->dst); + write_seqlock_bh(&idst->lock); + dst_release(rcu_dereference_protected( + idst->dst, + lockdep_is_held(&idst->lock.lock))); if (dst) { dst_hold(dst); idst->cookie = rt6_get_cookie((struct rt6_info *)dst); } else { idst->cookie = 0; } - idst->dst = dst; -} - -static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, - struct dst_entry *dst) -{ - - spin_lock_bh(&idst->lock); - __ip6_tnl_per_cpu_dst_set(idst, dst); - spin_unlock_bh(&idst->lock); + rcu_assign_pointer(idst->dst, dst); + write_sequnlock_bh(&idst->lock); } struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) { struct ip6_tnl_dst *idst; struct dst_entry *dst; + unsigned int seq; + u32 cookie; idst = raw_cpu_ptr(t->dst_cache); - spin_lock_bh(&idst->lock); - dst = idst->dst; - if (dst) { - if (!dst->obsolete || dst->ops->check(dst, idst->cookie)) { - dst_hold(idst->dst); - } else { - __ip6_tnl_per_cpu_dst_set(idst, NULL); - dst = NULL; - } + + rcu_read_lock(); + do { + seq = read_seqbegin(&idst->lock); + dst = rcu_dereference(idst->dst); + cookie = idst->cookie; + } while (read_seqretry(&idst->lock, seq)); + + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; + rcu_read_unlock(); + + if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) { + ip6_tnl_per_cpu_dst_set(idst, NULL); + dst_release(dst); + dst = NULL; } - spin_unlock_bh(&idst->lock); return dst; } EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); @@ -204,7 +207,7 @@ int ip6_tnl_dst_init(struct ip6_tnl *t) return -ENOMEM; for_each_possible_cpu(i) - spin_lock_init(&per_cpu_ptr(t->dst_cache, i)->lock); + seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock); return 0; } -- cgit v0.10.2 From daf158d0d544cec80b7b30deff8cfc59a6e17610 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 15 Sep 2015 22:41:21 +0200 Subject: net: mvneta: fix DMA buffer unmapping in mvneta_rx() This patch fixes a regression introduced by the commit a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers"). Due to this commit the newly allocated Rx buffers are DMA-unmapped in place of those passed to the networking stack. Obviously, this causes data corruptions. This patch fixes the issue by ensuring that the right Rx buffers are DMA-unmapped. Reported-by: Oren Laskin Signed-off-by: Simon Guinot Fixes: a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers") Cc: # v3.8+ Tested-by: Oren Laskin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index fe2299a..09ec32e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1479,6 +1479,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); struct sk_buff *skb; unsigned char *data; + dma_addr_t phys_addr; u32 rx_status; int rx_bytes, err; @@ -1486,6 +1487,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, rx_status = rx_desc->status; rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); data = (unsigned char *)rx_desc->buf_cookie; + phys_addr = rx_desc->buf_phys_addr; if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { @@ -1534,7 +1536,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, if (!skb) goto err_drop_frame; - dma_unmap_single(dev->dev.parent, rx_desc->buf_phys_addr, + dma_unmap_single(dev->dev.parent, phys_addr, MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); rcvd_pkts++; -- cgit v0.10.2 From d64f69b0373a7d0bcec8b5da7712977518a8f42b Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 15 Sep 2015 14:44:29 -0700 Subject: rtnetlink: catch -EOPNOTSUPP errors from ndo_bridge_getlink problem reported: kernel 4.1.3 ------------ # bridge vlan port vlan ids eth0 1 PVID Egress Untagged 90 91 92 93 94 95 96 97 98 99 100 vmbr0 1 PVID Egress Untagged 94 kernel 4.2 ----------- # bridge vlan port vlan ids ndo_bridge_getlink can return -EOPNOTSUPP when an interfaces ndo_bridge_getlink op is set to switchdev_port_bridge_getlink and CONFIG_SWITCHDEV is not defined. This today can happen to bond, rocker and team devices. This patch adds -EOPNOTSUPP checks after calls to ndo_bridge_getlink. Fixes: 85fdb956726ff2a ("switchdev: cut over to new switchdev_port_bridge_getlink") Reported-by: Alexandre DERUMIER Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a466821..0ec4840 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3047,6 +3047,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; u32 filter_mask = 0; + int err; if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) { struct nlattr *extfilt; @@ -3067,20 +3068,25 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *br_dev = netdev_master_upper_dev_get(dev); if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - if (idx >= cb->args[0] && - br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask, - NLM_F_MULTI) < 0) - break; + if (idx >= cb->args[0]) { + err = br_dev->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev, + filter_mask, NLM_F_MULTI); + if (err < 0 && err != -EOPNOTSUPP) + break; + } idx++; } if (ops->ndo_bridge_getlink) { - if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask, - NLM_F_MULTI) < 0) - break; + if (idx >= cb->args[0]) { + err = ops->ndo_bridge_getlink(skb, portid, + seq, dev, + filter_mask, + NLM_F_MULTI); + if (err < 0 && err != -EOPNOTSUPP) + break; + } idx++; } } -- cgit v0.10.2 From 892aa01df2ad67237f213c8f9d9b491e908aa910 Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Fri, 11 Sep 2015 22:25:48 +0200 Subject: net: stmmac: Use msleep rather then udelay for reset delay The reset delays used for stmmac are in the order of 10ms to 1 second, which is far too long for udelay usage, so switch to using msleep. Practically this fixes the PHY not being reliably detected in some cases as udelay wouldn't actually delay for long enough to let the phy reliably be reset. Signed-off-by: Sjoerd Simons Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index b735fa2..ebf6abc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -161,11 +161,16 @@ int stmmac_mdio_reset(struct mii_bus *bus) if (!gpio_request(reset_gpio, "mdio-reset")) { gpio_direction_output(reset_gpio, active_low ? 1 : 0); - udelay(data->delays[0]); + if (data->delays[0]) + msleep(DIV_ROUND_UP(data->delays[0], 1000)); + gpio_set_value(reset_gpio, active_low ? 0 : 1); - udelay(data->delays[1]); + if (data->delays[1]) + msleep(DIV_ROUND_UP(data->delays[1], 1000)); + gpio_set_value(reset_gpio, active_low ? 1 : 0); - udelay(data->delays[2]); + if (data->delays[2]) + msleep(DIV_ROUND_UP(data->delays[2], 1000)); } } #endif -- cgit v0.10.2 From 865ca084fdc68cd9b658da4b098008278da8fed3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 15 Sep 2015 13:50:18 -0700 Subject: ia64: Enable userfaultfd and membarrier system calls Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 95c39b9..99c96a5 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 319 /* length of syscall table */ +#define NR_syscalls 321 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 4610795..98e94e1 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -332,5 +332,7 @@ #define __NR_memfd_create 1340 #define __NR_bpf 1341 #define __NR_execveat 1342 +#define __NR_userfaultfd 1343 +#define __NR_membarrier 1344 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ae0de7b..37cc7a6 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1768,5 +1768,7 @@ sys_call_table: data8 sys_memfd_create // 1340 data8 sys_bpf data8 sys_execveat + data8 sys_userfaultfd + data8 sys_membarrier .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls -- cgit v0.10.2 From 982b527004826b40de1e821b123c70f05b41496c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 11 Sep 2015 18:38:28 -0700 Subject: openvswitch: Fix mask generation for nested attributes. Masks were added to OVS flows in a way that was backwards compatible with userspace programs that did not generate masks. As a result, it is possible that we may receive flows that do not have a mask and we need to synthesize one. Generating a mask requires iterating over attributes and descending into nested attributes. For each level we need to know the size to generate the correct mask. We do this with a linked table of attribute types. Although the logic to handle these nested attributes was there in concept, there are a number of bugs in practice. Examples include incomplete links between tables, variable length attributes being treated as nested and missing sanity checks. Signed-off-by: Jesse Gross Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c92d6a2..5c030a4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -57,6 +57,7 @@ struct ovs_len_tbl { }; #define OVS_ATTR_NESTED -1 +#define OVS_ATTR_VARIABLE -2 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -304,6 +305,10 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, +}; + static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, @@ -315,8 +320,9 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, - [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, + .next = ovs_vxlan_ext_key_lens }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -349,6 +355,13 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, }; +static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) +{ + return expected_len == attr_len || + expected_len == OVS_ATTR_NESTED || + expected_len == OVS_ATTR_VARIABLE; +} + static bool is_all_zero(const u8 *fp, size_t size) { int i; @@ -388,7 +401,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } expected_len = ovs_key_lens[type].len; - if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { + if (!check_attr_len(nla_len(nla), expected_len)) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -473,29 +486,50 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, return 0; } -static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { - [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, -}; - -static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, +static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) { - struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + struct nlattr *a; + int rem; unsigned long opt_key_offset; struct vxlan_metadata opts; - int err; BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); - err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); - if (err < 0) - return err; - memset(&opts, 0, sizeof(opts)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); - if (tb[OVS_VXLAN_EXT_GBP]) - opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + if (type > OVS_VXLAN_EXT_MAX) { + OVS_NLERR(log, "VXLAN extension %d out of range max %d", + type, OVS_VXLAN_EXT_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_vxlan_ext_key_lens[type].len)) { + OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", + type, nla_len(a), + ovs_vxlan_ext_key_lens[type].len); + return -EINVAL; + } + + switch (type) { + case OVS_VXLAN_EXT_GBP: + opts.gbp = nla_get_u32(a); + break; + default: + OVS_NLERR(log, "Unknown VXLAN extension attribute %d", + type); + return -EINVAL; + } + } + if (rem) { + OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", + rem); + return -EINVAL; + } if (!is_mask) SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); @@ -528,8 +562,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return -EINVAL; } - if (ovs_tunnel_key_lens[type].len != nla_len(a) && - ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { + if (!check_attr_len(nla_len(a), + ovs_tunnel_key_lens[type].len)) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; @@ -1052,10 +1086,13 @@ static void nlattr_set(struct nlattr *attr, u8 val, /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) - nlattr_set(nla, val, tbl[nla_type(nla)].next); - else + if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { + if (tbl[nla_type(nla)].next) + tbl = tbl[nla_type(nla)].next; + nlattr_set(nla, val, tbl); + } else { memset(nla_data(nla), val, nla_len(nla)); + } } } @@ -1922,8 +1959,7 @@ static int validate_set(const struct nlattr *a, key_len /= 2; if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type].len != key_len && - ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) + !check_attr_len(key_len, ovs_key_lens[key_type].len)) return -EINVAL; if (masked && !validate_masked(nla_data(ovs_key), key_len)) -- cgit v0.10.2 From 58d29e3ce903dcafacee9e355225d64922325cf0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:03 +0200 Subject: atm: he: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/atm/he.c b/drivers/atm/he.c index a8da3a5..0f5cb37 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1578,9 +1578,7 @@ he_stop(struct he_dev *he_dev) kfree(he_dev->rbpl_virt); kfree(he_dev->rbpl_table); - - if (he_dev->rbpl_pool) - dma_pool_destroy(he_dev->rbpl_pool); + dma_pool_destroy(he_dev->rbpl_pool); if (he_dev->rbrq_base) dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), @@ -1594,8 +1592,7 @@ he_stop(struct he_dev *he_dev) dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), he_dev->tpdrq_base, he_dev->tpdrq_phys); - if (he_dev->tpd_pool) - dma_pool_destroy(he_dev->tpd_pool); + dma_pool_destroy(he_dev->tpd_pool); if (he_dev->pci_dev) { pci_read_config_word(he_dev->pci_dev, PCI_COMMAND, &command); -- cgit v0.10.2 From adf78edac09f9640cd9676571586c4be46fb527c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:18 +0200 Subject: net: core: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) { \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); x = NULL; -} // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/net/core/sock.c b/net/core/sock.c index ca2984a..3307c02 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2740,10 +2740,8 @@ static void req_prot_cleanup(struct request_sock_ops *rsk_prot) return; kfree(rsk_prot->slab_name); rsk_prot->slab_name = NULL; - if (rsk_prot->slab) { - kmem_cache_destroy(rsk_prot->slab); - rsk_prot->slab = NULL; - } + kmem_cache_destroy(rsk_prot->slab); + rsk_prot->slab = NULL; } static int req_prot_init(const struct proto *prot) @@ -2828,10 +2826,8 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); mutex_unlock(&proto_list_mutex); - if (prot->slab != NULL) { - kmem_cache_destroy(prot->slab); - prot->slab = NULL; - } + kmem_cache_destroy(prot->slab); + prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); -- cgit v0.10.2 From 20471ed4d403a5f4de6aa0c10cd1e446f7f2b3c7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:27 +0200 Subject: dccp: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); @@ expression x; @@ -if (x != NULL) { \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); x = NULL; -} // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bd9e718..3de0d03 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -398,12 +398,8 @@ out_err: void dccp_ackvec_exit(void) { - if (dccp_ackvec_slab != NULL) { - kmem_cache_destroy(dccp_ackvec_slab); - dccp_ackvec_slab = NULL; - } - if (dccp_ackvec_record_slab != NULL) { - kmem_cache_destroy(dccp_ackvec_record_slab); - dccp_ackvec_record_slab = NULL; - } + kmem_cache_destroy(dccp_ackvec_slab); + dccp_ackvec_slab = NULL; + kmem_cache_destroy(dccp_ackvec_record_slab); + dccp_ackvec_record_slab = NULL; } diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 8349897..90f77d0 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -95,8 +95,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { - if (slab != NULL) - kmem_cache_destroy(slab); + kmem_cache_destroy(slab); } static int __init ccid_activate(struct ccid_operations *ccid_ops) -- cgit v0.10.2 From e94f5a2285fc94202a9efb2c687481f29b64132c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 14 Aug 2015 16:15:31 -0400 Subject: dax: fix O_DIRECT I/O to the last block of a blockdev commit bbab37ddc20b (block: Add support for DAX reads/writes to block devices) caused a regression in mkfs.xfs. That utility sets the block size of the device to the logical block size using the BLKBSZSET ioctl, and then issues a single sector read from the last sector of the device. This results in the dax_io code trying to do a page-sized read from 512 bytes from the end of the device. The result is -ERANGE being returned to userspace. The fix is to align the block to the page size before calling get_block. Thanks to willy for simplifying my original patch. Cc: Signed-off-by: Jeff Moyer Tested-by: Linda Knippers Signed-off-by: Dan Williams diff --git a/fs/dax.c b/fs/dax.c index 93bf2f9..7ae6df7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, size_t len; if (pos == max) { unsigned blkbits = inode->i_blkbits; - sector_t block = pos >> blkbits; + long page = pos >> PAGE_SHIFT; + sector_t block = page << (PAGE_SHIFT - blkbits); unsigned first = pos - (block << blkbits); long size; -- cgit v0.10.2 From f0b2e563bc419df7c1b3d2f494574c25125f6aed Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 14 Aug 2015 16:15:32 -0400 Subject: blockdev: don't set S_DAX for misaligned partitions The dax code doesn't currently support misaligned partitions, so disable O_DIRECT via dax until such time as that support materializes. Cc: Suggested-by: Boaz Harrosh Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams diff --git a/fs/block_dev.c b/fs/block_dev.c index 22ea424..073bb57 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { -- cgit v0.10.2 From 1f0bd44e937468446d080b98b5669844744c24a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Sep 2015 02:17:49 +0200 Subject: cpufreq: acpi-cpufreq: Use cpufreq_cpu_get_raw() in ->get() cpufreq_cpu_get() called by get_cur_freq_on_cpu() is overkill, because the ->get() callback is always invoked in a context in which all of the conditions checked by cpufreq_cpu_get() are guaranteed to be satisfied. Use cpufreq_cpu_get_raw() instead of it and drop the corresponding cpufreq_cpu_put() from get_cur_freq_on_cpu(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 15b921a..7982772 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -375,12 +375,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); - policy = cpufreq_cpu_get(cpu); + policy = cpufreq_cpu_get_raw(cpu); if (unlikely(!policy)) return 0; data = policy->driver_data; - cpufreq_cpu_put(policy); if (unlikely(!data || !data->freq_table)) return 0; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6633b3f..ef5ed94 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -238,13 +238,13 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_generic_init); -/* Only for cpufreq core internal use */ -static struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) { struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); return policy && cpumask_test_cpu(cpu, policy->cpus) ? policy : NULL; } +EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw); unsigned int cpufreq_generic_get(unsigned int cpu) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 430efcb..dca22de 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,9 +127,14 @@ struct cpufreq_policy { #define CPUFREQ_SHARED_TYPE_ANY (3) /* Freq can be set from any dependent CPU*/ #ifdef CONFIG_CPU_FREQ +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); void cpufreq_cpu_put(struct cpufreq_policy *policy); #else +static inline struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) +{ + return NULL; +} static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { return NULL; -- cgit v0.10.2 From 9b55613f42e8d40d5c9ccb8970bde6af4764b2ab Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 16:44:02 +0100 Subject: ARM: fix Thumb2 signal handling when ARMv6 is enabled When a kernel is built covering ARMv6 to ARMv7, we omit to clear the IT state when entering a signal handler. This can cause the first few instructions to be conditionally executed depending on the parent context. In any case, the original test for >= ARMv7 is broken - ARMv6 can have Thumb-2 support as well, and an ARMv6T2 specific build would omit this code too. Relax the test back to ARMv6 or greater. This results in us always clearing the IT state bits in the PSR, even on CPUs where these bits are reserved. However, they're reserved for the IT state, so this should cause no harm. Cc: Fixes: d71e1352e240 ("Clear the IT state when invoking a Thumb-2 signal handler") Acked-by: Tony Lindgren Tested-by: H. Nikolaus Schaller Tested-by: Grazvydas Ignotas Signed-off-by: Russell King diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e..586eef2 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -343,12 +343,17 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 6 /* - * Clear the If-Then Thumb-2 execution state - * ARM spec requires this to be all 000s in ARM mode - * Snapdragon S4/Krait misbehaves on a Thumb=>ARM - * signal transition without this. + * Clear the If-Then Thumb-2 execution state. ARM spec + * requires this to be all 000s in ARM mode. Snapdragon + * S4/Krait misbehaves on a Thumb=>ARM signal transition + * without this. + * + * We must do this whenever we are running on a Thumb-2 + * capable CPU, which includes ARMv6T2. However, we elect + * to do this whenever we're on an ARMv6 or later CPU for + * simplicity. */ cpsr &= ~PSR_IT_MASK; #endif -- cgit v0.10.2 From 4e9fa50c6ccbebef0c4a4aae84090badf81359e6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 9 Sep 2015 22:24:56 +0300 Subject: vhost: move features to core virtio 1 and any layout are core features, move them there. This fixes vhost test. Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7d137a4..9eda69e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,8 +61,7 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_VERSION_1), + (1ULL << VIRTIO_NET_F_MRG_RXBUF) }; enum { diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index dfcc02c..0a5b404 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -166,9 +166,7 @@ enum { /* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */ enum { VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) | - (1ULL << VIRTIO_SCSI_F_T10_PI) | - (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) + (1ULL << VIRTIO_SCSI_F_T10_PI) }; #define VHOST_SCSI_MAX_TARGET 256 diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index d9c501e..f2882ac 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -277,10 +277,13 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return 0; case VHOST_SET_FEATURES: + printk(KERN_ERR "1\n"); if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; + printk(KERN_ERR "2\n"); if (features & ~VHOST_FEATURES) return -EOPNOTSUPP; + printk(KERN_ERR "3\n"); return vhost_test_set_features(n, features); case VHOST_RESET_OWNER: return vhost_test_reset_owner(n); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ce6f6da..4772862 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -173,7 +173,9 @@ enum { VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX) | - (1ULL << VHOST_F_LOG_ALL), + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VIRTIO_F_ANY_LAYOUT) | + (1ULL << VIRTIO_F_VERSION_1) }; static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) -- cgit v0.10.2 From fd2e8d4300c56f9660679d629c449118e14c00cd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 9 Sep 2015 22:26:25 +0300 Subject: tools/virtio: propagate V=X to kernel build Signed-off-by: Michael S. Tsirkin diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 505ad51..39c89a5 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -6,7 +6,7 @@ vringh_test: vringh_test.o vringh.o virtio_ring.o CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE vpath %.c ../../drivers/virtio ../../drivers/vhost mod: - ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} .PHONY: all test mod clean clean: ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ -- cgit v0.10.2 From ad2aa04218de9bd734d593adb0ac59854ec0cb68 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 10 Sep 2015 16:35:08 +0200 Subject: virtio/s390: handle failures of READ_VQ_CONF ccw In virtio_ccw_read_vq_conf() the return value of ccw_io_helper() was not checked. If the configuration could not be read properly, we'd wrongly assume a queue size of 0. Let's propagate any I/O error to virtio_ccw_setup_vq() so it may properly fail. Signed-off-by: Pierre Morel Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index f8d8fdb..e9fae30 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -400,12 +400,16 @@ static bool virtio_ccw_kvm_notify(struct virtqueue *vq) static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, struct ccw1 *ccw, int index) { + int ret; + vcdev->config_block->index = index; ccw->cmd_code = CCW_CMD_READ_VQ_CONF; ccw->flags = 0; ccw->count = sizeof(struct vq_config_block); ccw->cda = (__u32)(unsigned long)(vcdev->config_block); - ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + if (ret) + return ret; return vcdev->config_block->num; } @@ -503,6 +507,10 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, goto out_err; } info->num = virtio_ccw_read_vq_conf(vcdev, ccw, i); + if (info->num < 0) { + err = info->num; + goto out_err; + } size = PAGE_ALIGN(vring_size(info->num, KVM_VIRTIO_CCW_RING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); if (info->queue == NULL) { -- cgit v0.10.2 From 62bea5bff486644ecf363fe8a1a2f6f32c614a49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 15 Sep 2015 18:27:57 +0200 Subject: KVM: add halt_attempted_poll to VCPU stats This new statistic can help diagnosing VCPUs that, for any reason, trigger bad behavior of halt_poll_ns autotuning. For example, say halt_poll_ns = 480000, and wakeups are spaced exactly like 479us, 481us, 479us, 481us. Then KVM always fails polling and wastes 10+20+40+80+160+320+480 = 1110 microseconds out of every 479+481+479+481+479+481+479 = 3359 microseconds. The VCPU then is consuming about 30% more CPU than it would use without polling. This would show as an abnormally high number of attempted polling compared to the successful polls. Acked-by: Christian Borntraeger Signed-off-by: Paolo Bonzini diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dcba0fa..687ddeb 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -148,6 +148,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 415938d..4865945 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -195,6 +195,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index e8c8d9d..3a54dbc 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -128,6 +128,7 @@ struct kvm_vcpu_stat { u32 msa_disabled_exits; u32 flush_dcache_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd4c129..49ff3bf 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -55,6 +55,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98eebbf..195886a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -108,6 +108,7 @@ struct kvm_vcpu_stat { u32 dec_exits; u32 ext_intr_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d75bf32..cf00916 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -53,6 +53,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "queue_intr", VCPU_STAT(queue_intr) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, { "sp_storage", VCPU_STAT(sp_storage) }, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ae458f0..fd58751 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3d012e0..6ce4a0b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -210,6 +210,7 @@ struct kvm_vcpu_stat { u32 exit_validity; u32 exit_instruction; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 instruction_lctl; u32 instruction_lctlg; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c91eb94..2f807ab 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c12e845..349f80a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -711,6 +711,7 @@ struct kvm_vcpu_stat { u32 nmi_window_exits; u32 halt_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a60bdbc..6bbb0df 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -149,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "nmi_window", VCPU_STAT(nmi_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9af68db..04146a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2004,6 +2004,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) if (vcpu->halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); + ++vcpu->stat.halt_attempted_poll; do { /* * This sets KVM_REQ_UNHALT if an interrupt -- cgit v0.10.2 From b9a5ec33e32069a2f8af09d46266091602902bf4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:32:40 +0200 Subject: pinctrl: sunxi: Use irq_set_chip_handler_name_locked() __irq_set_chip_handler_name_locked() is about to be replaced. Use irq_set_chip_handler_name_locked() instead. Signed-off-by: Thomas Gleixner Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index fb4669c0..31af97d 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -617,13 +617,11 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type) spin_lock_irqsave(&pctl->lock, flags); if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_chip_handler_name_locked(d->irq, - &sunxi_pinctrl_level_irq_chip, - handle_fasteoi_irq, NULL); + irq_set_chip_handler_name_locked(d, &sunxi_pinctrl_level_irq_chip, + handle_fasteoi_irq, NULL); else - __irq_set_chip_handler_name_locked(d->irq, - &sunxi_pinctrl_edge_irq_chip, - handle_edge_irq, NULL); + irq_set_chip_handler_name_locked(d, &sunxi_pinctrl_edge_irq_chip, + handle_edge_irq, NULL); regval = readl(pctl->membase + reg); regval &= ~(IRQ_CFG_IRQ_MASK << index); -- cgit v0.10.2 From e902e14549e04c040fb6e15785efd35f810a223a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:36:04 +0200 Subject: genirq: Remove __irq_set_chip_handler_name_locked() All users converted to irq_set_chip_handler_name_locked() Signed-off-by: Thomas Gleixner diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 0593c69..2974138 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -184,19 +184,6 @@ static inline void __irq_set_handler_locked(unsigned int irq, desc->handle_irq = handler; } -/* caller has locked the irq_desc and both params are valid */ -static inline void -__irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip, - irq_flow_handler_t handler, const char *name) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - irq_desc_get_irq_data(desc)->chip = chip; - desc->handle_irq = handler; - desc->name = name; -} - /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq -- cgit v0.10.2 From 655471f54c2e395ba29ae4156ba0f49928177cc1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Sep 2015 11:24:17 +1000 Subject: powerpc/boot: Specify ABI v2 when building an LE boot wrapper The kernel does it, not the boot wrapper, which breaks with some cross compilers that still default to ABI v1. Fixes: 147c05168fc8 ("powerpc/boot: Add support for 64bit little endian wrapper") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda..4eec430 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc -- cgit v0.10.2 From 36b35d5d807b7e57aff7d08e63de8b17731ee211 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 15 Sep 2015 12:30:08 +0530 Subject: powerpc/mm: Recompute hash value after a failed update If we had secondary hash flag set, we ended up modifying hash value in the updatepp code path. Hence with a failed updatepp we will be using a wrong hash value for the following hash insert. Fix this by recomputing hash before insert. Without this patch we can end up with using wrong slot number in linux pte. That can result in us missing an hash pte update or invalidate which can cause memory corruption or even machine check. Fixes: 6d492ecc6489 ("powerpc/THP: Add code to handle HPTE faults for hugepages") Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Aneesh Kumar K.V Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9..4d87122 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; -- cgit v0.10.2 From 04bb92e4b4cf06a66889d37b892b78f926faa9d4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 16 Sep 2015 19:31:11 +0800 Subject: KVM: vmx: fix VPID is 0000H in non-root operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference SDM 28.1: The current VPID is 0000H in the following situations: - Outside VMX operation. (This includes operation in system-management mode under the default treatment of SMIs and SMM with VMX operation; see Section 34.14.) - In VMX root operation. - In VMX non-root operation when the “enable VPID” VM-execution control is 0. The VPID should never be 0000H in non-root operation when "enable VPID" VM-execution control is 1. However, commit 34a1cd60 ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()") remove the codes which reserve 0000H for VMX root operation. This patch fix it by again reserving 0000H for VMX root operation. Cc: stable@vger.kernel.org # 3.19+ Fixes: 34a1cd60d17f62c1f077c1478a6c2ca8c3d17af4 Reported-by: Wincy Van Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d019868..6407674 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6064,6 +6064,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); -- cgit v0.10.2 From 6b83bd941479e99f6890ed4b5c54cb1f43c7146f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Jun 2015 15:52:31 +0200 Subject: powerpc/mpc52xx: Use irq_set_handler_locked() Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: @@ struct irq_data *d; expression E1; @@ -__irq_set_handler_locked(d->irq, E1); +irq_set_handler_locked(d, E1); Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Julia Lawall Cc: Anatolij Gustschin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2944bc8..4fe2074 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -196,7 +196,7 @@ static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) ctrl_reg |= (type << (22 - (l2irq * 2))); out_be32(&intr->ctrl, ctrl_reg); - __irq_set_handler_locked(d->irq, handler); + irq_set_handler_locked(d, handler); return 0; } -- cgit v0.10.2 From e9e879a3d6cecc4258294c674627045da648fab0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Jun 2015 15:52:32 +0200 Subject: powerpc/cpm2: Use irq_set_handler_locked() Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: @@ struct irq_data *d; expression E1; @@ -__irq_set_handler_locked(d->irq, E1); +irq_set_handler_locked(d, E1); Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Julia Lawall Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index a11bd1d..9e86074 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -155,9 +155,9 @@ static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type) irqd_set_trigger_type(d, flow_type); if (flow_type & IRQ_TYPE_LEVEL_LOW) - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); else - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); /* internal IRQ senses are LEVEL_LOW * EXT IRQ and Port C IRQ senses are programmable -- cgit v0.10.2 From 9758a7b0e5fdd5a08bcc73540bb3b98d59295a6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Jun 2015 15:52:34 +0200 Subject: powerpc/ipic: Use irq_set_handler_locked() Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: @@ struct irq_data *d; expression E1; @@ -__irq_set_handler_locked(d->irq, E1); +irq_set_handler_locked(d, E1); Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Julia Lawall Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Anton Blanchard Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 6b2b689..b1297ab 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -624,10 +624,10 @@ static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type) irqd_set_trigger_type(d, flow_type); if (flow_type & IRQ_TYPE_LEVEL_LOW) { - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); d->chip = &ipic_level_irq_chip; } else { - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); d->chip = &ipic_edge_irq_chip; } -- cgit v0.10.2 From 9ca86b204b3c874e6ada28309f607e3aa74d60a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Jun 2015 15:52:36 +0200 Subject: powerpc/mpc8xx: Use irq_set_handler_locked() Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: @@ struct irq_data *d; expression E1; @@ -__irq_set_handler_locked(d->irq, E1); +irq_set_handler_locked(d, E1); Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Julia Lawall Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index d93a78be..9a42397 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -55,7 +55,7 @@ static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type) unsigned int siel = in_be32(&siu_reg->sc_siel); siel |= mpc8xx_irqd_to_bit(d); out_be32(&siu_reg->sc_siel, siel); - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); } return 0; } -- cgit v0.10.2 From a7147db0f5184954687546d430aed65cce483ffa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:51:00 +0200 Subject: gpio: vf610: Use irq_set_handler_locked Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 3d5714d..7a6640b 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -176,9 +176,9 @@ static int vf610_gpio_irq_set_type(struct irq_data *d, u32 type) port->irqc[d->hwirq] = irqc; if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); else - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); return 0; } -- cgit v0.10.2 From 7c51173adde2ec520348812214704313f5b36f75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:52:53 +0200 Subject: pinctrl/pistachio: Use irq_set_handler_locked Use irq_set_handler_locked() as it avoids a redundant lookup of the irq descriptor. Search and replacement was done with coccinelle: Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 3dc2ae1..f22d585 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1303,9 +1303,9 @@ static int pistachio_gpio_irq_set_type(struct irq_data *data, unsigned int type) } if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_handler_locked(data->irq, handle_level_irq); + irq_set_handler_locked(data, handle_level_irq); else - __irq_set_handler_locked(data->irq, handle_edge_irq); + irq_set_handler_locked(data, handle_edge_irq); return 0; } -- cgit v0.10.2 From 123236ccacc933daac3b39c5eb1f0011c70d41d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:54:23 +0200 Subject: genirq: Remove __irq_set_handler_locked() All users converted to irq_set_handler_locked() Signed-off-by: Thomas Gleixner diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 2974138..dce395c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -174,16 +174,6 @@ static inline int irq_has_action(unsigned int irq) return irq_desc_has_action(irq_to_desc(irq)); } -/* caller has locked the irq_desc and both params are valid */ -static inline void __irq_set_handler_locked(unsigned int irq, - irq_flow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->handle_irq = handler; -} - /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq -- cgit v0.10.2 From 755d119a6204974b2005a98549a48a75a7f5010b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 14:37:12 +0200 Subject: genirq: Simplify irq_data_to_desc() Avoid the lookup of irq_desc and use the same mechanism for hierarchical and flat irqdomains. Based-on-a-patch-from: Jiang Liu Signed-off-by: Thomas Gleixner diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dce395c..1fc5304 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -98,11 +98,7 @@ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - return irq_to_desc(data->irq); -#else - return container_of(data, struct irq_desc, irq_data); -#endif + return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) -- cgit v0.10.2 From fc5697126aa074c289df5e8baae28e115963023f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 12:33:42 +0200 Subject: genirq: Provide IRQD_FORWARDED_TO_VCPU status flag Provide a irq data flag to mark an irq forwarded to a VCPU along with the accessor functions. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier diff --git a/include/linux/irq.h b/include/linux/irq.h index 72a6b2f..e54ae82 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -186,6 +186,7 @@ struct irq_data { * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt * IRQD_WAKEUP_ARMED - Wakeup mode armed + * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -200,6 +201,7 @@ enum { IRQD_IRQ_MASKED = (1 << 17), IRQD_IRQ_INPROGRESS = (1 << 18), IRQD_WAKEUP_ARMED = (1 << 19), + IRQD_FORWARDED_TO_VCPU = (1 << 20), }; #define __irqd_to_state(d) ((d)->common->state_use_accessors) @@ -278,6 +280,20 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_ARMED; } +static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; +} /* * Functions for chained handlers which can be enabled/disabled by the -- cgit v0.10.2 From 714665351cc718e01f0ce34ef5325932d56d8b4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 12:37:36 +0200 Subject: irqchip/gic: Use IRQD_FORWARDED_TO_VCPU flag Get rid of the handler_data abuse. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index a947057..9bccdd2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -145,29 +145,10 @@ static inline bool cascading_gic_irq(struct irq_data *d) void *data = irq_data_get_irq_handler_data(d); /* - * If handler_data pointing to one of the secondary GICs, then - * this is a cascading interrupt, and it cannot possibly be - * forwarded. + * If handler_data is set, this is a cascading interrupt, and + * it cannot possibly be forwarded. */ - if (data >= (void *)(gic_data + 1) && - data < (void *)(gic_data + MAX_GIC_NR)) - return true; - - return false; -} - -static inline bool forwarded_irq(struct irq_data *d) -{ - /* - * A forwarded interrupt: - * - is on the primary GIC - * - has its handler_data set to a value - * - that isn't a secondary GIC - */ - if (d->handler_data && !cascading_gic_irq(d)) - return true; - - return false; + return data != NULL; } /* @@ -201,7 +182,7 @@ static void gic_eoimode1_mask_irq(struct irq_data *d) * disabled/masked will not get "stuck", because there is * noone to deactivate it (guest is being terminated). */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR); } @@ -218,7 +199,7 @@ static void gic_eoi_irq(struct irq_data *d) static void gic_eoimode1_eoi_irq(struct irq_data *d) { /* Do not deactivate an IRQ forwarded to a vcpu. */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) return; writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE); @@ -296,7 +277,10 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) if (cascading_gic_irq(d)) return -EINVAL; - d->handler_data = vcpu; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); return 0; } -- cgit v0.10.2 From 4df7f54d16ebe9c37631182de84c4347f0e44db3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 13:19:16 +0200 Subject: irqchip/gic-v3: Use IRQD_FORWARDED_TO_VCPU flag Get rid of the handler_data abuse. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 7deed6e..b5418f5 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -70,11 +70,6 @@ static inline int gic_irq_in_rdist(struct irq_data *d) return gic_irq(d) < 32; } -static inline bool forwarded_irq(struct irq_data *d) -{ - return d->handler_data != NULL; -} - static inline void __iomem *gic_dist_base(struct irq_data *d) { if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */ @@ -249,7 +244,7 @@ static void gic_eoimode1_mask_irq(struct irq_data *d) * disabled/masked will not get "stuck", because there is * noone to deactivate it (guest is being terminated). */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) gic_poke_irq(d, GICD_ICACTIVER); } @@ -324,7 +319,7 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d) * No need to deactivate an LPI, or an interrupt that * is is getting forwarded to a vcpu. */ - if (gic_irq(d) >= 8192 || forwarded_irq(d)) + if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) return; gic_write_dir(gic_irq(d)); } @@ -357,7 +352,10 @@ static int gic_set_type(struct irq_data *d, unsigned int type) static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { - d->handler_data = vcpu; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); return 0; } -- cgit v0.10.2 From 449e9cae58b06be1293858ec8e5d8cb728238baa Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:16 +0800 Subject: genirq: Move field 'node' from irq_data into irq_common_data NUMA node information is per-irq instead of per-irqchip, so move it into struct irq_common_data. Also use CONFIG_NUMA to guard irq_common_data.node. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433145945-789-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index e54ae82..ebcc5c6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -129,9 +129,13 @@ struct irq_domain; * struct irq_common_data - per irq data shared by all irqchips * @state_use_accessors: status information for irq chip functions. * Use accessor functions to deal with it + * @node: node index useful for balancing */ struct irq_common_data { unsigned int state_use_accessors; +#ifdef CONFIG_NUMA + unsigned int node; +#endif }; /** @@ -139,7 +143,6 @@ struct irq_common_data { * @mask: precomputed bitmask for accessing the chip registers * @irq: interrupt number * @hwirq: hardware interrupt number, local to the interrupt domain - * @node: node index useful for balancing * @common: point to data shared by all irqchips * @chip: low level interrupt hardware access * @domain: Interrupt translation domain; responsible for mapping @@ -156,7 +159,6 @@ struct irq_data { u32 mask; unsigned int irq; unsigned long hwirq; - unsigned int node; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; @@ -664,9 +666,18 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } -static inline int irq_data_get_node(struct irq_data *d) +static inline int irq_common_data_get_node(struct irq_common_data *d) { +#ifdef CONFIG_NUMA return d->node; +#else + return 0; +#endif +} + +static inline int irq_data_get_node(struct irq_data *d) +{ + return irq_common_data_get_node(d->common); } static inline struct cpumask *irq_get_affinity_mask(int irq) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index eee4b38..5ef0c2d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) static inline int irq_desc_get_node(struct irq_desc *desc) { - return irq_data_get_node(&desc->irq_data); + return irq_common_data_get_node(&desc->irq_common_data); } #ifdef CONFIG_PM_SLEEP diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0a2a4b6..7f3e9fa 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -52,11 +52,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) static void desc_smp_init(struct irq_desc *desc, int node) { - desc->irq_data.node = node; cpumask_copy(desc->irq_data.affinity, irq_default_affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_clear(desc->pending_mask); #endif +#ifdef CONFIG_NUMA + desc->irq_common_data.node = node; +#endif } #else diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 79baaf8..dc9d27c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, child->parent_data = irq_data; irq_data->irq = child->irq; irq_data->common = child->common; - irq_data->node = child->node; irq_data->domain = domain; } -- cgit v0.10.2 From af7080e040d223b5e7d0a8de28f7cea24ef017c4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:21 +0800 Subject: genirq: Move field 'handler_data' from irq_data into irq_common_data Handler data (handler_data) is per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-13-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index ebcc5c6..516aadb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -130,12 +130,14 @@ struct irq_domain; * @state_use_accessors: status information for irq chip functions. * Use accessor functions to deal with it * @node: node index useful for balancing + * @handler_data: per-IRQ data for the irq_chip methods */ struct irq_common_data { unsigned int state_use_accessors; #ifdef CONFIG_NUMA unsigned int node; #endif + void *handler_data; }; /** @@ -149,7 +151,6 @@ struct irq_common_data { * between hwirq number and linux irq number. * @parent_data: pointer to parent struct irq_data to support hierarchy * irq_domain - * @handler_data: per-IRQ data for the irq_chip methods * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations * @msi_desc: MSI descriptor @@ -165,7 +166,6 @@ struct irq_data { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_data *parent_data; #endif - void *handler_data; void *chip_data; struct msi_desc *msi_desc; cpumask_var_t affinity; @@ -641,12 +641,12 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d) static inline void *irq_get_handler_data(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->handler_data : NULL; + return d ? d->common->handler_data : NULL; } static inline void *irq_data_get_irq_handler_data(struct irq_data *d) { - return d->handler_data; + return d->common->handler_data; } static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1fc5304..c7b3e1c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -123,7 +123,7 @@ static inline void *irq_desc_get_chip_data(struct irq_desc *desc) static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { - return desc->irq_data.handler_data; + return desc->irq_common_data.handler_data; } static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6e40a95..a48e00e 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data) if (!desc) return -EINVAL; - desc->irq_data.handler_data = data; + desc->irq_common_data.handler_data = data; irq_put_desc_unlock(desc, flags); return 0; } @@ -796,7 +796,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, return; __irq_do_set_handler(desc, handle, 1, NULL); - desc->irq_data.handler_data = data; + desc->irq_common_data.handler_data = data; irq_put_desc_busunlock(desc, flags); } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 7f3e9fa..594b3e3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -72,11 +72,12 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, { int cpu; + desc->irq_common_data.handler_data = NULL; + desc->irq_data.common = &desc->irq_common_data; desc->irq_data.irq = irq; desc->irq_data.chip = &no_irq_chip; desc->irq_data.chip_data = NULL; - desc->irq_data.handler_data = NULL; desc->irq_data.msi_desc = NULL; irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); -- cgit v0.10.2 From 9df872faa7e1619e9278bec00ceaed2236533530 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jun 2015 11:47:50 +0800 Subject: genirq: Move field 'affinity' from irq_data into irq_common_data Irq affinity mask is per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433303281-27688-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1bbd0fe..836d11b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data, err = assign_irq_vector(irq, data, dest); if (err) { - struct irq_data *top = irq_get_irq_data(irq); - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(top))) + irq_data_get_affinity_mask(irq_data))) pr_err("Failed to recover vector for irq %d\n", irq); return err; } diff --git a/include/linux/irq.h b/include/linux/irq.h index 516aadb..75d5054 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -110,8 +110,8 @@ enum { /* * Return value for chip->irq_set_affinity() * - * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity - * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity + * IRQ_SET_MASK_OK - OK, core updates irq_common_data.affinity + * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping * all descendent irqchips. @@ -131,6 +131,7 @@ struct irq_domain; * Use accessor functions to deal with it * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods + * @affinity: IRQ affinity on SMP */ struct irq_common_data { unsigned int state_use_accessors; @@ -138,6 +139,7 @@ struct irq_common_data { unsigned int node; #endif void *handler_data; + cpumask_var_t affinity; }; /** @@ -154,7 +156,6 @@ struct irq_common_data { * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations * @msi_desc: MSI descriptor - * @affinity: IRQ affinity on SMP */ struct irq_data { u32 mask; @@ -168,7 +169,6 @@ struct irq_data { #endif void *chip_data; struct msi_desc *msi_desc; - cpumask_var_t affinity; }; /* @@ -684,12 +684,12 @@ static inline struct cpumask *irq_get_affinity_mask(int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->affinity : NULL; + return d ? d->common->affinity : NULL; } static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { - return d->affinity; + return d->common->affinity; } unsigned int arch_dynirq_lower_bound(unsigned int from); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 594b3e3..bb48a5c 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void) #ifdef CONFIG_SMP static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { - if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) + if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, + gfp, node)) return -ENOMEM; #ifdef CONFIG_GENERIC_PENDING_IRQ if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { - free_cpumask_var(desc->irq_data.affinity); + free_cpumask_var(desc->irq_common_data.affinity); return -ENOMEM; } #endif @@ -52,7 +53,7 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) static void desc_smp_init(struct irq_desc *desc, int node) { - cpumask_copy(desc->irq_data.affinity, irq_default_affinity); + cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_clear(desc->pending_mask); #endif @@ -124,7 +125,7 @@ static void free_masks(struct irq_desc *desc) #ifdef CONFIG_GENERIC_PENDING_IRQ free_cpumask_var(desc->pending_mask); #endif - free_cpumask_var(desc->irq_data.affinity); + free_cpumask_var(desc->irq_common_data.affinity); } #else static inline void free_masks(struct irq_desc *desc) { } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad1b064..f9a59f6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: - cpumask_copy(data->affinity, mask); + cpumask_copy(desc->irq_common_data.affinity, mask); case IRQ_SET_MASK_OK_NOCOPY: irq_set_thread_affinity(desc); ret = 0; @@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work) if (irq_move_pending(&desc->irq_data)) irq_get_pending(cpumask, desc); else - cpumask_copy(cpumask, desc->irq_data.affinity); + cpumask_copy(cpumask, desc->irq_common_data.affinity); raw_spin_unlock_irqrestore(&desc->lock, flags); notify->notify(notify, cpumask); @@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) * one of the targets is online. */ if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { - if (cpumask_intersects(desc->irq_data.affinity, + if (cpumask_intersects(desc->irq_common_data.affinity, cpu_online_mask)) - set = desc->irq_data.affinity; + set = desc->irq_common_data.affinity; else irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); } @@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) * This code is triggered unconditionally. Check the affinity * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. */ - if (desc->irq_data.affinity) - cpumask_copy(mask, desc->irq_data.affinity); + if (desc->irq_common_data.affinity) + cpumask_copy(mask, desc->irq_common_data.affinity); else valid = false; raw_spin_unlock_irq(&desc->lock); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 0e97c14..e3a8c95 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -39,7 +39,7 @@ static struct proc_dir_entry *root_irq_dir; static int show_irq_affinity(int type, struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = desc->irq_data.affinity; + const struct cpumask *mask = desc->irq_common_data.affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (irqd_is_setaffinity_pending(&desc->irq_data)) -- cgit v0.10.2 From b237721c5d95082a803c0be686f56d2dd1de995b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:43 +0800 Subject: genirq: Move field 'msi_desc' from irq_data into irq_common_data MSI descriptors are per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-35-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index 75d5054..4913c32 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -132,6 +132,7 @@ struct irq_domain; * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods * @affinity: IRQ affinity on SMP + * @msi_desc: MSI descriptor */ struct irq_common_data { unsigned int state_use_accessors; @@ -139,6 +140,7 @@ struct irq_common_data { unsigned int node; #endif void *handler_data; + struct msi_desc *msi_desc; cpumask_var_t affinity; }; @@ -155,7 +157,6 @@ struct irq_common_data { * irq_domain * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations - * @msi_desc: MSI descriptor */ struct irq_data { u32 mask; @@ -168,7 +169,6 @@ struct irq_data { struct irq_data *parent_data; #endif void *chip_data; - struct msi_desc *msi_desc; }; /* @@ -652,12 +652,12 @@ static inline void *irq_data_get_irq_handler_data(struct irq_data *d) static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->msi_desc : NULL; + return d ? d->common->msi_desc : NULL; } static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d) { - return d->msi_desc; + return d->common->msi_desc; } static inline u32 irq_get_trigger_type(unsigned int irq) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c7b3e1c..fbb4d5a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -128,7 +128,7 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc) static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) { - return desc->irq_data.msi_desc; + return desc->irq_common_data.msi_desc; } /* diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a48e00e..8c55d54 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, if (!desc) return -EINVAL; - desc->irq_data.msi_desc = entry; + desc->irq_common_data.msi_desc = entry; if (entry && !irq_offset) entry->irq = irq_base; irq_put_desc_unlock(desc, flags); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index bb48a5c..5966694 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -74,12 +74,12 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, int cpu; desc->irq_common_data.handler_data = NULL; + desc->irq_common_data.msi_desc = NULL; desc->irq_data.common = &desc->irq_common_data; desc->irq_data.irq = irq; desc->irq_data.chip = &no_irq_chip; desc->irq_data.chip_data = NULL; - desc->irq_data.msi_desc = NULL; irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); desc->handle_irq = handle_bad_irq; -- cgit v0.10.2 From bd0b9ac405e1794d72533c3d487aa65b6b955a0c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:42:37 +0200 Subject: genirq: Remove irq argument from irq flow handlers Most interrupt flow handlers do not use the irq argument. Those few which use it can retrieve the irq number from the irq descriptor. Remove the argument. Search and replace was done with coccinelle and some extra helper scripts around it. Thanks to Julia for her help! Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2804648..2d6efcf 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -117,6 +117,6 @@ handle_irq(int irq) } irq_enter(); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); irq_exit(); } diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index d9e44b6..4ffd185 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -252,7 +252,7 @@ static struct irq_chip idu_irq_chip = { static int idu_first_irq; -static void idu_cascade_isr(unsigned int __core_irq, struct irq_desc *desc) +static void idu_cascade_isr(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); unsigned int core_irq = irq_desc_get_irq(desc); diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 96dabcb..996aed3 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -95,7 +95,7 @@ void it8152_init_irq(void) } } -void it8152_irq_demux(unsigned int irq, struct irq_desc *desc) +void it8152_irq_demux(struct irq_desc *desc) { int bits_pd, bits_lp, bits_ld; int i; diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 304adea..0e97b4b 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -138,7 +138,7 @@ static struct locomo_dev_info locomo_devices[] = { }, }; -static void locomo_handler(unsigned int __irq, struct irq_desc *desc) +static void locomo_handler(struct irq_desc *desc) { struct locomo *lchip = irq_desc_get_chip_data(desc); int req, i; diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 4f29025..3d22494 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -196,10 +196,8 @@ static struct sa1111_dev_info sa1111_devices[] = { * active IRQs causes the interrupt output to pulse, the upper levels * will call us again if there are more interrupts to process. */ -static void -sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void sa1111_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); unsigned int stat0, stat1, i; struct sa1111 *sachip = irq_desc_get_handler_data(desc); void __iomem *mapbase = sachip->base + SA1111_INTC; @@ -214,7 +212,7 @@ sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc) sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1); if (stat0 == 0 && stat1 == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h index d36a73d7..076777f 100644 --- a/arch/arm/include/asm/hardware/it8152.h +++ b/arch/arm/include/asm/hardware/it8152.h @@ -106,7 +106,7 @@ extern void __iomem *it8152_base_address; struct pci_dev; struct pci_sys_data; -extern void it8152_irq_demux(unsigned int irq, struct irq_desc *desc); +extern void it8152_irq_demux(struct irq_desc *desc); extern void it8152_init_irq(void); extern int it8152_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); extern int it8152_pci_setup(int nr, struct pci_sys_data *sys); diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index 2092ee1..de4634b 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -23,10 +23,10 @@ extern int show_fiq_list(struct seq_file *, int); /* * This is for easy migration, but should be changed in the source */ -#define do_bad_IRQ(irq,desc) \ +#define do_bad_IRQ(desc) \ do { \ raw_spin_lock(&desc->lock); \ - handle_bad_irq(irq, desc); \ + handle_bad_irq(desc); \ raw_spin_unlock(&desc->lock); \ } while(0) diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 305d7c6..bfb3703 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -69,14 +69,14 @@ static struct irq_chip pmu_irq_chip = { .irq_ack = pmu_irq_ack, }; -static void pmu_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void pmu_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); unsigned long cause = readl(PMU_INTERRUPT_CAUSE); + unsigned int irq; cause &= readl(PMU_INTERRUPT_MASK); if (cause == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c index fcd79bc..c01fca1 100644 --- a/arch/arm/mach-footbridge/isa-irq.c +++ b/arch/arm/mach-footbridge/isa-irq.c @@ -87,13 +87,12 @@ static struct irq_chip isa_hi_chip = { .irq_unmask = isa_unmask_pic_hi_irq, }; -static void -isa_irq_handler(unsigned int irq, struct irq_desc *desc) +static void isa_irq_handler(struct irq_desc *desc) { unsigned int isa_irq = *(unsigned char *)PCIIACK_BASE; if (isa_irq < _ISA_IRQ(0) || isa_irq >= _ISA_IRQ(16)) { - do_bad_IRQ(isa_irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c index 220333e..2478d9f 100644 --- a/arch/arm/mach-gemini/gpio.c +++ b/arch/arm/mach-gemini/gpio.c @@ -126,7 +126,7 @@ static int gpio_set_irq_type(struct irq_data *d, unsigned int type) return 0; } -static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { unsigned int port = (unsigned int)irq_desc_get_handler_data(desc); unsigned int gpio_irq_no, irq_stat; diff --git a/arch/arm/mach-imx/3ds_debugboard.c b/arch/arm/mach-imx/3ds_debugboard.c index 45903be..16496a0 100644 --- a/arch/arm/mach-imx/3ds_debugboard.c +++ b/arch/arm/mach-imx/3ds_debugboard.c @@ -85,7 +85,7 @@ static struct platform_device smsc_lan9217_device = { .resource = smsc911x_resources, }; -static void mxc_expio_irq_handler(u32 irq, struct irq_desc *desc) +static void mxc_expio_irq_handler(struct irq_desc *desc) { u32 imr_val; u32 int_valid; diff --git a/arch/arm/mach-imx/mach-mx31ads.c b/arch/arm/mach-imx/mach-mx31ads.c index 2c08535..2b147e4 100644 --- a/arch/arm/mach-imx/mach-mx31ads.c +++ b/arch/arm/mach-imx/mach-mx31ads.c @@ -154,7 +154,7 @@ static inline void mxc_init_imx_uart(void) imx31_add_imx_uart0(&uart_pdata); } -static void mx31ads_expio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx31ads_expio_irq_handler(struct irq_desc *desc) { u32 imr_val; u32 int_valid; diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c index 9f89e76..f6235b2 100644 --- a/arch/arm/mach-iop13xx/msi.c +++ b/arch/arm/mach-iop13xx/msi.c @@ -91,7 +91,7 @@ static void (*write_imipr[])(u32) = { write_imipr_3, }; -static void iop13xx_msi_handler(unsigned int irq, struct irq_desc *desc) +static void iop13xx_msi_handler(struct irq_desc *desc) { int i, j; unsigned long status; diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c index cce4cef..2ae431e 100644 --- a/arch/arm/mach-lpc32xx/irq.c +++ b/arch/arm/mach-lpc32xx/irq.c @@ -370,7 +370,7 @@ static struct irq_chip lpc32xx_irq_chip = { .irq_set_wake = lpc32xx_irq_wake }; -static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc) +static void lpc32xx_sic1_handler(struct irq_desc *desc) { unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC1_BASE)); @@ -383,7 +383,7 @@ static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc) } } -static void lpc32xx_sic2_handler(unsigned int irq, struct irq_desc *desc) +static void lpc32xx_sic2_handler(struct irq_desc *desc) { unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC2_BASE)); diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 6373e2b..842302d 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -69,8 +69,7 @@ static struct platform_device *devices[] __initdata = { #define DEBUG_IRQ(fmt...) while (0) {} #endif -static void -netx_hif_demux_handler(unsigned int irq_unused, struct irq_desc *desc) +static void netx_hif_demux_handler(struct irq_desc *desc) { unsigned int irq = NETX_IRQ_HIF_CHAINED(0); unsigned int stat; diff --git a/arch/arm/mach-omap1/fpga.c b/arch/arm/mach-omap1/fpga.c index dfec671..39e20d0 100644 --- a/arch/arm/mach-omap1/fpga.c +++ b/arch/arm/mach-omap1/fpga.c @@ -87,7 +87,7 @@ static void fpga_mask_ack_irq(struct irq_data *d) fpga_ack_irq(d); } -static void innovator_fpga_IRQ_demux(unsigned int irq, struct irq_desc *desc) +static void innovator_fpga_IRQ_demux(struct irq_desc *desc) { u32 stat; int fpga_irq; diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 257e98c..3fc2cbe 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -102,7 +102,7 @@ static void omap_prcm_events_filter_priority(unsigned long *events, * dispatched accordingly. Clearing of the wakeup events should be * done by the SoC specific individual handlers. */ -static void omap_prcm_irq_handler(unsigned int irq, struct irq_desc *desc) +static void omap_prcm_irq_handler(struct irq_desc *desc) { unsigned long pending[OMAP_PRCM_MAX_NR_PENDING_REG]; unsigned long priority_pending[OMAP_PRCM_MAX_NR_PENDING_REG]; diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index 70366b3..a3ebb51 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -496,7 +496,7 @@ static struct irq_chip balloon3_irq_chip = { .irq_unmask = balloon3_unmask_irq, }; -static void balloon3_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void balloon3_irq_handler(struct irq_desc *desc) { unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) & balloon3_irq_enabled; diff --git a/arch/arm/mach-pxa/cm-x2xx-pci.c b/arch/arm/mach-pxa/cm-x2xx-pci.c index 1fa79f1..3221ae1 100644 --- a/arch/arm/mach-pxa/cm-x2xx-pci.c +++ b/arch/arm/mach-pxa/cm-x2xx-pci.c @@ -29,13 +29,12 @@ void __iomem *it8152_base_address; static int cmx2xx_it8152_irq_gpio; -static void cmx2xx_it8152_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void cmx2xx_it8152_irq_demux(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); /* clear our parent irq */ desc->irq_data.chip->irq_ack(&desc->irq_data); - it8152_irq_demux(irq, desc); + it8152_irq_demux(desc); } void __cmx2xx_pci_init_irq(int irq_gpio) diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index b070167..4823d97 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -120,7 +120,7 @@ static struct irq_chip lpd270_irq_chip = { .irq_unmask = lpd270_unmask_irq, }; -static void lpd270_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void lpd270_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index 9a0c8af..d8319b5 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -284,7 +284,7 @@ static struct irq_chip pcm990_irq_chip = { .irq_unmask = pcm990_unmask_irq, }; -static void pcm990_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void pcm990_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index 4841d6c..8ab2637 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -276,7 +276,7 @@ static inline unsigned long viper_irq_pending(void) viper_irq_enabled_mask; } -static void viper_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void viper_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index 6f94dd7..30e62a3 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -105,7 +105,7 @@ static inline unsigned long zeus_irq_pending(void) return __raw_readw(ZEUS_CPLD_ISA_IRQ) & zeus_irq_enabled_mask; } -static void zeus_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void zeus_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index f726d4c..dc67a7f 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -551,8 +551,7 @@ static void ecard_check_lockup(struct irq_desc *desc) } } -static void -ecard_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ecard_irq_handler(struct irq_desc *desc) { ecard_t *ec; int called = 0; diff --git a/arch/arm/mach-s3c24xx/bast-irq.c b/arch/arm/mach-s3c24xx/bast-irq.c index ced1ab8..2bb0896 100644 --- a/arch/arm/mach-s3c24xx/bast-irq.c +++ b/arch/arm/mach-s3c24xx/bast-irq.c @@ -100,9 +100,7 @@ static struct irq_chip bast_pc104_chip = { .irq_ack = bast_pc104_maskack }; -static void -bast_irq_pc104_demux(unsigned int irq, - struct irq_desc *desc) +static void bast_irq_pc104_demux(struct irq_desc *desc) { unsigned int stat; unsigned int irqno; diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c index fd63ecf..ddb30b8 100644 --- a/arch/arm/mach-s3c64xx/common.c +++ b/arch/arm/mach-s3c64xx/common.c @@ -388,22 +388,22 @@ static inline void s3c_irq_demux_eint(unsigned int start, unsigned int end) } } -static void s3c_irq_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint0_3(struct irq_desc *desc) { s3c_irq_demux_eint(0, 3); } -static void s3c_irq_demux_eint4_11(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint4_11(struct irq_desc *desc) { s3c_irq_demux_eint(4, 11); } -static void s3c_irq_demux_eint12_19(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint12_19(struct irq_desc *desc) { s3c_irq_demux_eint(12, 19); } -static void s3c_irq_demux_eint20_27(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint20_27(struct irq_desc *desc) { s3c_irq_demux_eint(20, 27); } diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 6d237b4..8411985 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -166,7 +166,7 @@ static struct sa1100_port_fns neponset_port_fns = { * ensure that the IRQ signal is deasserted before returning. This * is rather unfortunate. */ -static void neponset_irq_handler(unsigned int irq, struct irq_desc *desc) +static void neponset_irq_handler(struct irq_desc *desc) { struct neponset_drvdata *d = irq_desc_get_handler_data(desc); unsigned int irr; diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 79c33ec..7bd22d8 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -407,7 +407,7 @@ static int gpio_irq_set_type(struct irq_data *d, u32 type) return 0; } -static void gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct orion_gpio_chip *ochip = irq_desc_get_handler_data(desc); u32 cause, type; diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index d51ff8f..96cabad 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -144,7 +144,7 @@ static struct irq_chip eic_chip = { .irq_set_type = eic_set_irq_type, }; -static void demux_eic_irq(unsigned int irq, struct irq_desc *desc) +static void demux_eic_irq(struct irq_desc *desc) { struct eic *eic = irq_desc_get_handler_data(desc); unsigned long status, pending; diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 157a5e0..4f61378 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -281,7 +281,7 @@ static struct irq_chip gpio_irqchip = { .irq_set_type = gpio_irq_type, }; -static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct pio_device *pio = irq_desc_get_chip_data(desc); unsigned gpio_irq; diff --git a/arch/blackfin/include/asm/irq_handler.h b/arch/blackfin/include/asm/irq_handler.h index 4b2a992..d2f90c7 100644 --- a/arch/blackfin/include/asm/irq_handler.h +++ b/arch/blackfin/include/asm/irq_handler.h @@ -60,7 +60,7 @@ extern void bfin_internal_mask_irq(unsigned int irq); extern void bfin_internal_unmask_irq(unsigned int irq); struct irq_desc; -extern void bfin_demux_mac_status_irq(unsigned int, struct irq_desc *); -extern void bfin_demux_gpio_irq(unsigned int, struct irq_desc *); +extern void bfin_demux_mac_status_irq(struct irq_desc *); +extern void bfin_demux_gpio_irq(struct irq_desc *); #endif diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 0ba2576..052cde5 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -107,7 +107,7 @@ asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) * than crashing, do something sensible. */ if (irq >= NR_IRQS) - handle_bad_irq(irq, &bad_irq_desc); + handle_bad_irq(&bad_irq_desc); else generic_handle_irq(irq); diff --git a/arch/blackfin/mach-bf537/ints-priority.c b/arch/blackfin/mach-bf537/ints-priority.c index 14b2f74..a48baae 100644 --- a/arch/blackfin/mach-bf537/ints-priority.c +++ b/arch/blackfin/mach-bf537/ints-priority.c @@ -89,8 +89,7 @@ static struct irq_chip bf537_generic_error_irqchip = { .irq_unmask = bf537_generic_error_unmask_irq, }; -static void bf537_demux_error_irq(unsigned int int_err_irq, - struct irq_desc *inta_desc) +static void bf537_demux_error_irq(struct irq_desc *inta_desc) { int irq = 0; @@ -182,15 +181,12 @@ static struct irq_chip bf537_mac_rx_irqchip = { .irq_unmask = bf537_mac_rx_unmask_irq, }; -static void bf537_demux_mac_rx_irq(unsigned int __int_irq, - struct irq_desc *desc) +static void bf537_demux_mac_rx_irq(struct irq_desc *desc) { - unsigned int int_irq = irq_desc_get_irq(desc); - if (bfin_read_DMA1_IRQ_STATUS() & (DMA_DONE | DMA_ERR)) bfin_handle_irq(IRQ_MAC_RX); else - bfin_demux_gpio_irq(int_irq, desc); + bfin_demux_gpio_irq(desc); } #endif diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index a6d1b03..e8d4d74 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -656,8 +656,7 @@ static struct irq_chip bfin_mac_status_irqchip = { .irq_set_wake = bfin_mac_status_set_wake, }; -void bfin_demux_mac_status_irq(unsigned int int_err_irq, - struct irq_desc *inta_desc) +void bfin_demux_mac_status_irq(struct irq_desc *inta_desc) { int i, irq = 0; u32 status = bfin_read_EMAC_SYSTAT(); @@ -825,7 +824,7 @@ static void bfin_demux_gpio_block(unsigned int irq) } } -void bfin_demux_gpio_irq(unsigned int __inta_irq, struct irq_desc *desc) +void bfin_demux_gpio_irq(struct irq_desc *desc) { unsigned int inta_irq = irq_desc_get_irq(desc); unsigned int irq; diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c index d487698..ddcb45d 100644 --- a/arch/c6x/platforms/megamod-pic.c +++ b/arch/c6x/platforms/megamod-pic.c @@ -93,7 +93,7 @@ static struct irq_chip megamod_chip = { .irq_unmask = unmask_megamod, }; -static void megamod_irq_cascade(unsigned int __irq, struct irq_desc *desc) +static void megamod_irq_cascade(struct irq_desc *desc) { struct megamod_cascade_data *cascade; struct megamod_pic *pic; diff --git a/arch/m68k/amiga/amiints.c b/arch/m68k/amiga/amiints.c index 47b5f90..7ff739e 100644 --- a/arch/m68k/amiga/amiints.c +++ b/arch/m68k/amiga/amiints.c @@ -46,7 +46,7 @@ static struct irq_chip amiga_irq_chip = { * The builtin Amiga hardware interrupt handlers. */ -static void ami_int1(unsigned int irq, struct irq_desc *desc) +static void ami_int1(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -69,7 +69,7 @@ static void ami_int1(unsigned int irq, struct irq_desc *desc) } } -static void ami_int3(unsigned int irq, struct irq_desc *desc) +static void ami_int3(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -92,7 +92,7 @@ static void ami_int3(unsigned int irq, struct irq_desc *desc) } } -static void ami_int4(unsigned int irq, struct irq_desc *desc) +static void ami_int4(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -121,7 +121,7 @@ static void ami_int4(unsigned int irq, struct irq_desc *desc) } } -static void ami_int5(unsigned int irq, struct irq_desc *desc) +static void ami_int5(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; diff --git a/arch/m68k/coldfire/intc-5272.c b/arch/m68k/coldfire/intc-5272.c index 47371de..b0a19e2 100644 --- a/arch/m68k/coldfire/intc-5272.c +++ b/arch/m68k/coldfire/intc-5272.c @@ -143,12 +143,10 @@ static int intc_irq_set_type(struct irq_data *d, unsigned int type) * We need to be careful with the masking/acking due to the side effects * of masking an interrupt. */ -static void intc_external_irq(unsigned int __irq, struct irq_desc *desc) +static void intc_external_irq(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); - irq_desc_get_chip(desc)->irq_ack(&desc->irq_data); - handle_simple_irq(irq, desc); + handle_simple_irq(desc); } static struct irq_chip intc_irq_chip = { diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h index 81ca118..a644f4a 100644 --- a/arch/m68k/include/asm/irq.h +++ b/arch/m68k/include/asm/irq.h @@ -64,8 +64,7 @@ extern void m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_regs *)); extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt); extern void m68k_setup_irq_controller(struct irq_chip *, - void (*handle)(unsigned int irq, - struct irq_desc *desc), + void (*handle)(struct irq_desc *desc), unsigned int irq, unsigned int cnt); extern unsigned int irq_canonicalize(unsigned int irq); diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h index fe3fc9a..53c632c 100644 --- a/arch/m68k/include/asm/mac_via.h +++ b/arch/m68k/include/asm/mac_via.h @@ -261,7 +261,7 @@ extern void via_irq_enable(int); extern void via_irq_disable(int); extern void via_nubus_irq_startup(int irq); extern void via_nubus_irq_shutdown(int irq); -extern void via1_irq(unsigned int irq, struct irq_desc *desc); +extern void via1_irq(struct irq_desc *desc); extern void via1_set_head(int); extern int via2_scsi_drq_pending(void); diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 3fe0e43..f6f7d42 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -45,7 +45,7 @@ void __init baboon_init(void) * Baboon interrupt handler. This works a lot like a VIA. */ -static void baboon_irq(unsigned int irq, struct irq_desc *desc) +static void baboon_irq(struct irq_desc *desc) { int irq_bit, irq_num; unsigned char events; diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 191610d..55d65927 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -63,7 +63,7 @@ void __init oss_nubus_init(void) * Handle miscellaneous OSS interrupts. */ -static void oss_irq(unsigned int __irq, struct irq_desc *desc) +static void oss_irq(struct irq_desc *desc) { int events = oss->irq_pending & (OSS_IP_IOPSCC | OSS_IP_SCSI | OSS_IP_IOPISM); @@ -99,7 +99,7 @@ static void oss_irq(unsigned int __irq, struct irq_desc *desc) * Unlike the VIA/RBV this is on its own autovector interrupt level. */ -static void oss_nubus_irq(unsigned int irq, struct irq_desc *desc) +static void oss_nubus_irq(struct irq_desc *desc) { int events, irq_bit, i; diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 3b9e302..cd38f29 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -113,7 +113,7 @@ void __init psc_init(void) * PSC interrupt handler. It's a lot like the VIA interrupt handler. */ -static void psc_irq(unsigned int __irq, struct irq_desc *desc) +static void psc_irq(struct irq_desc *desc) { unsigned int offset = (unsigned int)irq_desc_get_handler_data(desc); unsigned int irq = irq_desc_get_irq(desc); diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index e198dec..ce56e04 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -446,7 +446,7 @@ void via_nubus_irq_shutdown(int irq) * via6522.c :-), disable/pending masks added. */ -void via1_irq(unsigned int irq, struct irq_desc *desc) +void via1_irq(struct irq_desc *desc) { int irq_num; unsigned char irq_bit, events; @@ -467,7 +467,7 @@ void via1_irq(unsigned int irq, struct irq_desc *desc) } while (events >= irq_bit); } -static void via2_irq(unsigned int irq, struct irq_desc *desc) +static void via2_irq(struct irq_desc *desc) { int irq_num; unsigned char irq_bit, events; @@ -493,7 +493,7 @@ static void via2_irq(unsigned int irq, struct irq_desc *desc) * VIA2 dispatcher as a fast interrupt handler. */ -void via_nubus_irq(unsigned int irq, struct irq_desc *desc) +static void via_nubus_irq(struct irq_desc *desc) { int slot_irq; unsigned char slot_bit, events; diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index a336094..3074b64 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -94,13 +94,11 @@ void do_IRQ(int irq, struct pt_regs *regs) "MOV D0.5,%0\n" "MOV D1Ar1,%1\n" "MOV D1RtP,%2\n" - "MOV D0Ar2,%3\n" "SWAP A0StP,D0.5\n" "SWAP PC,D1RtP\n" "MOV A0StP,D0.5\n" : - : "r" (isp), "r" (irq), "r" (desc->handle_irq), - "r" (desc) + : "r" (isp), "r" (desc), "r" (desc->handle_irq) : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4", "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP", "D0.5" diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c index 4c496c5..da9f922 100644 --- a/arch/mips/alchemy/common/irq.c +++ b/arch/mips/alchemy/common/irq.c @@ -851,7 +851,7 @@ static struct syscore_ops alchemy_gpic_pmops = { /* create chained handlers for the 4 IC requests to the MIPS IRQ ctrl */ #define DISP(name, base, addr) \ -static void au1000_##name##_dispatch(unsigned int irq, struct irq_desc *d) \ +static void au1000_##name##_dispatch(struct irq_desc *d) \ { \ unsigned long r = __raw_readl((void __iomem *)KSEG1ADDR(addr)); \ if (likely(r)) \ @@ -865,7 +865,7 @@ DISP(ic0r1, AU1000_INTC0_INT_BASE, AU1000_IC0_PHYS_ADDR + IC_REQ1INT) DISP(ic1r0, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ0INT) DISP(ic1r1, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ1INT) -static void alchemy_gpic_dispatch(unsigned int irq, struct irq_desc *d) +static void alchemy_gpic_dispatch(struct irq_desc *d) { int i = __raw_readl(AU1300_GPIC_ADDR + AU1300_GPIC_PRIENC); generic_handle_irq(ALCHEMY_GPIC_INT_BASE + i); diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c index 324ad72..faeddf1 100644 --- a/arch/mips/alchemy/devboards/bcsr.c +++ b/arch/mips/alchemy/devboards/bcsr.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(bcsr_mod); /* * DB1200/PB1200 CPLD IRQ muxer */ -static void bcsr_csc_handler(unsigned int irq, struct irq_desc *d) +static void bcsr_csc_handler(struct irq_desc *d) { unsigned short bisr = __raw_readw(bcsr_virt + BCSR_REG_INTSTAT); struct irq_chip *chip = irq_desc_get_chip(d); diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index ec9a371..8da9961 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -69,7 +69,7 @@ static struct irqaction ar2315_ahb_err_interrupt = { .name = "ar2315-ahb-error", }; -static void ar2315_misc_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar2315_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar2315_rst_reg_read(AR2315_ISR) & ar2315_rst_reg_read(AR2315_IMR); diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index e63e38f..acd55a9 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -73,7 +73,7 @@ static struct irqaction ar5312_ahb_err_interrupt = { .name = "ar5312-ahb-error", }; -static void ar5312_misc_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar5312_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar5312_rst_reg_read(AR5312_ISR) & ar5312_rst_reg_read(AR5312_IMR); diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 807132b..15ecb48 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -26,7 +26,7 @@ #include "common.h" #include "machtypes.h" -static void ath79_misc_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ath79_misc_irq_handler(struct irq_desc *desc) { void __iomem *base = ath79_reset_base; u32 pending; @@ -119,7 +119,7 @@ static void __init ath79_misc_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler); } -static void ar934x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) { u32 status; @@ -148,7 +148,7 @@ static void ar934x_ip2_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(2), ar934x_ip2_irq_dispatch); } -static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void qca955x_ip2_irq_dispatch(struct irq_desc *desc) { u32 status; @@ -171,7 +171,7 @@ static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) } } -static void qca955x_ip3_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void qca955x_ip3_irq_dispatch(struct irq_desc *desc) { u32 status; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f26c3c6..0352bc8 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2221,7 +2221,7 @@ static irqreturn_t octeon_irq_cib_handler(int my_irq, void *data) if (irqd_get_trigger_type(irq_data) & IRQ_TYPE_EDGE_BOTH) cvmx_write_csr(host_data->raw_reg, 1ull << i); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); } } diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h index 2a4c128..be52c21 100644 --- a/arch/mips/include/asm/netlogic/common.h +++ b/arch/mips/include/asm/netlogic/common.h @@ -57,8 +57,8 @@ #include struct irq_desc; -void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc); -void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc); +void nlm_smp_function_ipi_handler(struct irq_desc *desc); +void nlm_smp_resched_ipi_handler(struct irq_desc *desc); void nlm_smp_irq_init(int hwcpuid); void nlm_boot_secondary_cpus(void); int nlm_wakeup_secondary_cpus(void); diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index 6cd69fd..a74e181 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -291,7 +291,7 @@ static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int i writel(mask, reg); } -static void jz_gpio_irq_demux_handler(unsigned int irq, struct irq_desc *desc) +static void jz_gpio_irq_demux_handler(struct irq_desc *desc) { uint32_t flag; unsigned int gpio_irq; diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 0136b4f..10d86d5 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -82,7 +82,7 @@ void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action) } /* IRQ_IPI_SMP_FUNCTION Handler */ -void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc) +void nlm_smp_function_ipi_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); clear_c0_eimr(irq); @@ -92,7 +92,7 @@ void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc) } /* IRQ_IPI_SMP_RESCHEDULE handler */ -void nlm_smp_resched_ipi_handler(unsigned int __irq, struct irq_desc *desc) +void nlm_smp_resched_ipi_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); clear_c0_eimr(irq); diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index f8d0acb..b4fa641 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -318,7 +318,7 @@ static int ar2315_pci_host_setup(struct ar2315_pci_ctrl *apc) return 0; } -static void ar2315_pci_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar2315_pci_irq_handler(struct irq_desc *desc) { struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc); u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) & diff --git a/arch/mips/pci/pci-ar71xx.c b/arch/mips/pci/pci-ar71xx.c index ad35a5e..7db963d 100644 --- a/arch/mips/pci/pci-ar71xx.c +++ b/arch/mips/pci/pci-ar71xx.c @@ -226,7 +226,7 @@ static struct pci_ops ar71xx_pci_ops = { .write = ar71xx_pci_write_config, }; -static void ar71xx_pci_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ar71xx_pci_irq_handler(struct irq_desc *desc) { struct ar71xx_pci_controller *apc; void __iomem *base = ath79_reset_base; diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c index 907d11d..2013dad 100644 --- a/arch/mips/pci/pci-ar724x.c +++ b/arch/mips/pci/pci-ar724x.c @@ -225,7 +225,7 @@ static struct pci_ops ar724x_pci_ops = { .write = ar724x_pci_write, }; -static void ar724x_pci_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ar724x_pci_irq_handler(struct irq_desc *desc) { struct ar724x_pci_controller *apc; void __iomem *base; diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 53c8efa..ed6732f9 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -129,7 +129,7 @@ static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc, rt3883_pci_w32(rpc, val, RT3883_PCI_REG_CFGDATA); } -static void rt3883_pci_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void rt3883_pci_irq_handler(struct irq_desc *desc) { struct rt3883_pci_controller *rpc; u32 pending; diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 8c624a8..4cf77f3 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -96,7 +96,7 @@ unsigned int get_c0_compare_int(void) return CP0_LEGACY_COMPARE_IRQ; } -static void ralink_intc_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ralink_intc_irq_handler(struct irq_desc *desc) { u32 pending = rt_intc_r32(INTC_REG_STATUS0); diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h index 25784cc..1e155ca 100644 --- a/arch/powerpc/include/asm/qe_ic.h +++ b/arch/powerpc/include/asm/qe_ic.h @@ -59,14 +59,14 @@ enum qe_ic_grp_id { #ifdef CONFIG_QUICC_ENGINE void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)); + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)); unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic); unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic); #else static inline void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)) + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)) {} static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) { return 0; } @@ -78,8 +78,7 @@ void qe_ic_set_highest_priority(unsigned int virq, int high); int qe_ic_set_priority(unsigned int virq, unsigned int priority); int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high); -static inline void qe_ic_cascade_low_ipic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_low_ipic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); @@ -88,8 +87,7 @@ static inline void qe_ic_cascade_low_ipic(unsigned int irq, generic_handle_irq(cascade_irq); } -static inline void qe_ic_cascade_high_ipic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_high_ipic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); @@ -98,8 +96,7 @@ static inline void qe_ic_cascade_high_ipic(unsigned int irq, generic_handle_irq(cascade_irq); } -static inline void qe_ic_cascade_low_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_low_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); @@ -111,8 +108,7 @@ static inline void qe_ic_cascade_low_mpic(unsigned int irq, chip->irq_eoi(&desc->irq_data); } -static inline void qe_ic_cascade_high_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_high_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); @@ -124,8 +120,7 @@ static inline void qe_ic_cascade_high_mpic(unsigned int irq, chip->irq_eoi(&desc->irq_data); } -static inline void qe_ic_cascade_muxed_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_muxed_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq; diff --git a/arch/powerpc/include/asm/tsi108_pci.h b/arch/powerpc/include/asm/tsi108_pci.h index 5653d7c..ae59d5b 100644 --- a/arch/powerpc/include/asm/tsi108_pci.h +++ b/arch/powerpc/include/asm/tsi108_pci.h @@ -39,7 +39,7 @@ extern int tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary); extern void tsi108_pci_int_init(struct device_node *node); -extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc); +extern void tsi108_irq_cascade(struct irq_desc *desc); extern void tsi108_clear_pci_cfg_error(void); #endif /* _ASM_POWERPC_TSI108_PCI_H */ diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index cf82874..0035d14 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -104,8 +104,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, return irq_linear_revmap(cpld_pic_host, cpld_irq); } -static void -cpld_pic_cascade(unsigned int __irq, struct irq_desc *desc) +static void cpld_pic_cascade(struct irq_desc *desc) { unsigned int irq; diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 32cae33..8fb9548 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -80,7 +80,7 @@ static struct irq_chip media5200_irq_chip = { .irq_mask_ack = media5200_irq_mask, }; -void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void media5200_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int sub_virq, val; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 6301662..78ac19a 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -191,7 +191,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { .irq_set_type = mpc52xx_gpt_irq_set_type, }; -void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) { struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); int sub_virq; diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 74861a7..60e89fc 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -78,7 +78,7 @@ static struct irq_chip pq2ads_pci_ic = { .irq_disable = pq2ads_pci_mask_irq }; -static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc) +static void pq2ads_pci_irq_demux(struct irq_desc *desc) { struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc); u32 stat, mask, pend; diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 7bfb9b1..23791de 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -49,7 +49,7 @@ int __init mpc85xx_common_publish_devices(void) return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL); } #ifdef CONFIG_CPM2 -static void cpm2_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm2_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 13a8d1a..5ac70de 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -192,10 +192,8 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus) } #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade_handler(unsigned int __irq, - struct irq_desc *desc) +static void mpc85xx_8259_cascade_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) @@ -203,7 +201,7 @@ static void mpc85xx_8259_cascade_handler(unsigned int __irq, generic_handle_irq(cascade_irq); /* check for any interrupts from the shared IRQ line */ - handle_fasteoi_irq(irq, desc); + handle_fasteoi_irq(desc); } static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index ffdf021..f858306 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc85xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index d78fda8..b02d6a5 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -91,7 +91,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) (irq_hw_number_t)i); } -void socrates_fpga_pic_cascade(unsigned int __irq, struct irq_desc *desc) +static void socrates_fpga_pic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int irq = irq_desc_get_irq(desc); diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index d5b98c0..845defa 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -17,7 +17,7 @@ #include #ifdef CONFIG_PPC_I8259 -static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc86xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index d303774..c289fc7 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -214,7 +214,7 @@ void mpc8xx_restart(char *cmd) panic("Restart failed\n"); } -static void cpm_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq = cpm_get_irq(); diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 306888a..e0e68a1 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -93,7 +93,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) dcr_write(msic->dcr_host, dcr_n, val); } -static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) +static void axon_msi_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct axon_msic *msic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 6558e7e..9f609fc 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -99,7 +99,7 @@ static void iic_ioexc_eoi(struct irq_data *d) { } -static void iic_ioexc_cascade(unsigned int __irq, struct irq_desc *desc) +static void iic_ioexc_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cbe_iic_regs __iomem *node_iic = diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 1f72f4a..9d27de6 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops spider_host_ops = { .xlate = spider_host_xlate, }; -static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc) +static void spider_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct spider_pic *pic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 15ebc4e..987d1b8 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -363,7 +363,7 @@ void __init chrp_setup_arch(void) if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0); } -static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void chrp_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 9dd154d..9b79757 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -120,8 +120,7 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) return irq_linear_revmap(h, irq); } -static void hlwd_pic_irq_cascade(unsigned int cascade_virq, - struct irq_desc *desc) +static void hlwd_pic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1613303..8f65aa3 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -42,7 +42,7 @@ static phys_addr_t pci_membase; static u_char *restart; -static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mvme5100_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 39a74fa..9a83eb7 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -111,7 +111,7 @@ static void __init fwnmi_init(void) fwnmi_active = 1; } -static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void pseries_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c index 2bcb78b..d57b775 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.c +++ b/arch/powerpc/sysdev/ge/ge_pic.c @@ -91,7 +91,7 @@ static int gef_pic_cascade_irq; * should be masked out. */ -void gef_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void gef_pic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq; diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h index 908dbd9..5bf7e4b 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.h +++ b/arch/powerpc/sysdev/ge/ge_pic.h @@ -1,8 +1,6 @@ #ifndef __GEF_PIC_H__ #define __GEF_PIC_H__ - -void gef_pic_cascade(unsigned int, struct irq_desc *); unsigned int gef_pic_get_irq(void); void gef_pic_init(struct device_node *); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 97a8ae8..537e5db 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1181,7 +1181,7 @@ static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct, } /* IRQ handler for a secondary MPIC cascaded from another IRQ controller */ -static void mpic_cascade(unsigned int irq, struct irq_desc *desc) +static void mpic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct mpic *mpic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 47b352e..fbcc1f8 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -311,8 +311,8 @@ unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) } void __init qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)) + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)) { struct qe_ic *qe_ic; struct resource res; diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 57b5447..379de95 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -428,7 +428,7 @@ void __init tsi108_pci_int_init(struct device_node *node) init_pci_source(); } -void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc) +void tsi108_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = get_pci_source(); diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index d773453..6893d8f 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -194,7 +194,7 @@ static const struct irq_domain_ops uic_host_ops = { .xlate = irq_domain_xlate_twocell, }; -void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void uic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_data *idata = irq_desc_get_irq_data(desc); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 43b8b27..0f52d79 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -222,7 +222,7 @@ int xilinx_intc_get_irq(void) /* * Support code for cascading to 8259 interrupt controllers */ -static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc) +static void xilinx_i8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c index 6f97a8f..6129aef 100644 --- a/arch/sh/boards/mach-se/7343/irq.c +++ b/arch/sh/boards/mach-se/7343/irq.c @@ -29,7 +29,7 @@ static void __iomem *se7343_irq_regs; struct irq_domain *se7343_irq_domain; -static void se7343_irq_demux(unsigned int irq, struct irq_desc *desc) +static void se7343_irq_demux(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c index 60aebd1..24c74a8 100644 --- a/arch/sh/boards/mach-se/7722/irq.c +++ b/arch/sh/boards/mach-se/7722/irq.c @@ -28,7 +28,7 @@ static void __iomem *se7722_irq_regs; struct irq_domain *se7722_irq_domain; -static void se7722_irq_demux(unsigned int irq, struct irq_desc *desc) +static void se7722_irq_demux(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/boards/mach-se/7724/irq.c b/arch/sh/boards/mach-se/7724/irq.c index 9f20338..64e681e 100644 --- a/arch/sh/boards/mach-se/7724/irq.c +++ b/arch/sh/boards/mach-se/7724/irq.c @@ -92,7 +92,7 @@ static struct irq_chip se7724_irq_chip __read_mostly = { .irq_unmask = enable_se7724_irq, }; -static void se7724_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void se7724_irq_demux(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct fpga_irq set = get_fpga_irq(irq); diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c index 24555c3..1fb2cbe 100644 --- a/arch/sh/boards/mach-x3proto/gpio.c +++ b/arch/sh/boards/mach-x3proto/gpio.c @@ -60,7 +60,7 @@ static int x3proto_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) return virq; } -static void x3proto_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void x3proto_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c index e973561..8180092 100644 --- a/arch/sh/cchips/hd6446x/hd64461.c +++ b/arch/sh/cchips/hd6446x/hd64461.c @@ -56,7 +56,7 @@ static struct irq_chip hd64461_irq_chip = { .irq_unmask = hd64461_unmask_irq, }; -static void hd64461_irq_demux(unsigned int irq, struct irq_desc *desc) +static void hd64461_irq_demux(struct irq_desc *desc) { unsigned short intv = __raw_readw(HD64461_NIRR); unsigned int ext_irq = HD64461_IRQBASE; diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 0299f05..42efcf8 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -53,7 +53,7 @@ static inline unsigned int leon_eirq_get(int cpu) } /* Handle one or multiple IRQs from the extended interrupt controller */ -static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) +static void leon_handle_ext_irq(struct irq_desc *desc) { unsigned int eirq; struct irq_bucket *p; diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index 3382f7b..1e77128 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -357,7 +357,7 @@ static struct irq_chip grpci1_irq = { }; /* Handle one or multiple IRQs from the PCI core */ -static void grpci1_pci_flow_irq(unsigned int irq, struct irq_desc *desc) +static void grpci1_pci_flow_irq(struct irq_desc *desc) { struct grpci1_priv *priv = grpci1priv; int i, ack = 0; diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 814fb17..f727c4d 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -498,7 +498,7 @@ static struct irq_chip grpci2_irq = { }; /* Handle one or multiple IRQs from the PCI core */ -static void grpci2_pci_flow_irq(unsigned int irq, struct irq_desc *desc) +static void grpci2_pci_flow_irq(struct irq_desc *desc) { struct grpci2_priv *priv = grpci2priv; int i, ack = 0; diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index b3f73fd..4c017d0 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -304,17 +304,16 @@ static struct irq_chip tilegx_legacy_irq_chip = { * to Linux which just calls handle_level_irq() after clearing the * MAC INTx Assert status bit associated with this interrupt. */ -static void trio_handle_level_irq(unsigned int __irq, struct irq_desc *desc) +static void trio_handle_level_irq(struct irq_desc *desc) { struct pci_controller *controller = irq_desc_get_handler_data(desc); gxio_trio_context_t *trio_context = controller->trio; uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc); - unsigned int irq = irq_desc_get_irq(desc); int mac = controller->mac; unsigned int reg_offset; uint64_t level_mask; - handle_level_irq(irq, desc); + handle_level_irq(desc); /* * Clear the INTx Level status, otherwise future interrupts are diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c index c53729d..eb1fd00 100644 --- a/arch/unicore32/kernel/irq.c +++ b/arch/unicore32/kernel/irq.c @@ -112,7 +112,7 @@ static struct irq_chip puv3_low_gpio_chip = { * irq_controller_lock held, and IRQs disabled. Decode the IRQ * and call the handler. */ -static void puv3_gpio_handler(unsigned int __irq, struct irq_desc *desc) +static void puv3_gpio_handler(struct irq_desc *desc) { unsigned int mask, irq; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c80cf66..38da8f2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -68,11 +68,10 @@ static inline void *current_stack(void) return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); } -static inline int -execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) +static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; - u32 *isp, *prev_esp, arg1, arg2; + u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); irqstk = __this_cpu_read(hardirq_stack); @@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) asm volatile("xchgl %%ebx,%%esp \n" "call *%%edi \n" "movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (isp) - : "0" (irq), "1" (desc), "2" (isp), + : "=a" (arg1), "=b" (isp) + : "0" (desc), "1" (isp), "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; @@ -150,19 +149,15 @@ void do_softirq_own_stack(void) bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - unsigned int irq; - int overflow; - - overflow = check_stack_overflow(); + int overflow = check_stack_overflow(); if (IS_ERR_OR_NULL(desc)) return false; - irq = irq_desc_get_irq(desc); - if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) { if (unlikely(overflow)) print_stack_overflow(); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); } return true; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ff16ccb..c767cf2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) if (unlikely(IS_ERR_OR_NULL(desc))) return false; - generic_handle_irq_desc(irq_desc_get_irq(desc), desc); + generic_handle_irq_desc(desc); return true; } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 161804d..a0d09f6 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1015,7 +1015,7 @@ static struct clock_event_device lguest_clockevent = { * This is the Guest timer interrupt handler (hardware interrupt 0). We just * call the clockevent infrastructure and it does whatever needs doing. */ -static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +static void lguest_time_irq(struct irq_desc *desc) { unsigned long flags; diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index 4768a82..2bf37e6 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -266,7 +266,7 @@ int ipu_irq_unmap(unsigned int source) } /* Chained IRQ handler for IPU function and error interrupt */ -static void ipu_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void ipu_irq_handler(struct irq_desc *desc) { struct ipu *ipu = irq_desc_get_handler_data(desc); u32 status; diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 9b7e0b3..1b44941 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -201,8 +201,7 @@ static int altera_gpio_direction_output(struct gpio_chip *gc, return 0; } -static void altera_gpio_irq_edge_handler(unsigned int irq, - struct irq_desc *desc) +static void altera_gpio_irq_edge_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; struct irq_chip *chip; @@ -231,8 +230,7 @@ static void altera_gpio_irq_edge_handler(unsigned int irq, } -static void altera_gpio_irq_leveL_high_handler(unsigned int irq, - struct irq_desc *desc) +static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; struct irq_chip *chip; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 31b90ac..33a1f97 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -433,7 +433,7 @@ static int bcm_kona_gpio_irq_set_type(struct irq_data *d, unsigned int type) return 0; } -static void bcm_kona_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void bcm_kona_gpio_irq_handler(struct irq_desc *desc) { void __iomem *reg_base; int bit, bank_id; diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 9ea86d2..4c64627 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -236,7 +236,7 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) } /* Each UPG GIO block has one IRQ for all banks */ -static void brcmstb_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void brcmstb_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct brcmstb_gpio_priv *priv = brcmstb_gpio_gc_to_priv(gc); diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 94b0ab7..5e71538 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -326,8 +326,7 @@ static struct irq_chip gpio_irqchip = { .flags = IRQCHIP_SET_TYPE_MASKED, }; -static void -gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct davinci_gpio_regs __iomem *g; diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index c5be4b9..fcd5b0a 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -147,7 +147,7 @@ static u32 dwapb_do_irq(struct dwapb_gpio *gpio) return ret; } -static void dwapb_irq_handler(u32 irq, struct irq_desc *desc) +static void dwapb_irq_handler(struct irq_desc *desc) { struct dwapb_gpio *gpio = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 9d90366..3e3947b 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -78,7 +78,7 @@ static void ep93xx_gpio_int_debounce(unsigned int irq, bool enable) EP93XX_GPIO_REG(int_debounce_register_offset[port])); } -static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) { unsigned char status; int i; @@ -100,8 +100,7 @@ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc) } } -static void ep93xx_gpio_f_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc) { /* * map discontiguous hw irq range to continuous sw irq range: diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c index aa28c65..7009747 100644 --- a/drivers/gpio/gpio-intel-mid.c +++ b/drivers/gpio/gpio-intel-mid.c @@ -301,7 +301,7 @@ static const struct pci_device_id intel_gpio_ids[] = { }; MODULE_DEVICE_TABLE(pci, intel_gpio_ids); -static void intel_mid_irq_handler(unsigned irq, struct irq_desc *desc) +static void intel_mid_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct intel_mid_gpio *priv = to_intel_gpio_priv(gc); diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 153af46..127c37b 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -234,7 +234,7 @@ static int lp_gpio_direction_output(struct gpio_chip *chip, return 0; } -static void lp_gpio_irq_handler(unsigned hwirq, struct irq_desc *desc) +static void lp_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 8ef7a12..48ef368 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -194,7 +194,7 @@ static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset) return -ENXIO; } -static void mpc8xxx_gpio_irq_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc8xxx_gpio_irq_cascade(struct irq_desc *desc) { struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c index 7bcfb87..22523aa 100644 --- a/drivers/gpio/gpio-msic.c +++ b/drivers/gpio/gpio-msic.c @@ -232,7 +232,7 @@ static struct irq_chip msic_irqchip = { .irq_bus_sync_unlock = msic_bus_sync_unlock, }; -static void msic_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void msic_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct msic_gpio *mg = irq_data_get_irq_handler_data(data); diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c index d2012cf..4b42221 100644 --- a/drivers/gpio/gpio-msm-v2.c +++ b/drivers/gpio/gpio-msm-v2.c @@ -305,7 +305,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type) * which have been set as summary IRQ lines and which are triggered, * and to call their interrupt handlers. */ -static void msm_summary_irq_handler(unsigned int irq, struct irq_desc *desc) +static void msm_summary_irq_handler(struct irq_desc *desc) { unsigned long i; struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index b396bf3..df418b8 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -458,7 +458,7 @@ static int mvebu_gpio_irq_set_type(struct irq_data *d, unsigned int type) return 0; } -static void mvebu_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void mvebu_gpio_irq_handler(struct irq_desc *desc) { struct mvebu_gpio_chip *mvchip = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index b752b56..5e3235a 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -272,7 +272,7 @@ static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat) } /* MX1 and MX3 has one interrupt *per* gpio port */ -static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx3_gpio_irq_handler(struct irq_desc *desc) { u32 irq_stat; struct mxc_gpio_port *port = irq_desc_get_handler_data(desc); @@ -288,7 +288,7 @@ static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc) } /* MX2 has one interrupt *for all* gpio ports */ -static void mx2_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx2_gpio_irq_handler(struct irq_desc *desc) { u32 irq_msk, irq_stat; struct mxc_gpio_port *port; diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b7f383e..b7763f0 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -154,7 +154,7 @@ static void mxs_flip_edge(struct mxs_gpio_port *port, u32 gpio) } /* MXS has one interrupt *per* gpio port */ -static void mxs_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mxs_gpio_irq_handler(struct irq_desc *desc) { u32 irq_stat; struct mxs_gpio_port *port = irq_desc_get_handler_data(desc); diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 2ae0d47..9df014c 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -709,7 +709,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) * line's interrupt handler has been run, we may miss some nested * interrupts. */ -static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void omap_gpio_irq_handler(struct irq_desc *desc) { void __iomem *isr_reg = NULL; u32 isr; diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 0475613..229ef65 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -187,7 +187,7 @@ static int pl061_irq_type(struct irq_data *d, unsigned trigger) return 0; } -static void pl061_irq_handler(unsigned irq, struct irq_desc *desc) +static void pl061_irq_handler(struct irq_desc *desc) { unsigned long pending; int offset; diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 55a11de..df2ce55 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -401,7 +401,7 @@ static int pxa_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static void pxa_gpio_demux_handler(unsigned int irq, struct irq_desc *desc) +static void pxa_gpio_demux_handler(struct irq_desc *desc) { struct pxa_gpio_chip *c; int loop, gpio, gpio_base, n; diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index 67bd2f5..990fa90 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -172,8 +172,7 @@ static struct irq_domain *sa1100_gpio_irqdomain; * irq_controller_lock held, and IRQs disabled. Decode the IRQ * and call the handler. */ -static void -sa1100_gpio_handler(unsigned int __irq, struct irq_desc *desc) +static void sa1100_gpio_handler(struct irq_desc *desc) { unsigned int irq, mask; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 9b14aaf..027e5f4 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -266,7 +266,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d) gpiochip_unlock_as_irq(&tegra_gpio_chip, gpio); } -static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void tegra_gpio_irq_handler(struct irq_desc *desc) { int port; int pin; diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index 5a49205..30653e6 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -192,7 +192,7 @@ out: return ret; } -static void timbgpio_irq(unsigned int irq, struct irq_desc *desc) +static void timbgpio_irq(struct irq_desc *desc) { struct timbgpio *tgpio = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); diff --git a/drivers/gpio/gpio-tz1090.c b/drivers/gpio/gpio-tz1090.c index bbac92a..87bb1b1 100644 --- a/drivers/gpio/gpio-tz1090.c +++ b/drivers/gpio/gpio-tz1090.c @@ -375,7 +375,7 @@ static int gpio_set_irq_wake(struct irq_data *data, unsigned int on) #define gpio_set_irq_wake NULL #endif -static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void tz1090_gpio_irq_handler(struct irq_desc *desc) { irq_hw_number_t hw; unsigned int irq_stat, irq_no; @@ -400,7 +400,7 @@ static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) == IRQ_TYPE_EDGE_BOTH) tz1090_gpio_irq_next_edge(bank, hw); - generic_handle_irq_desc(irq_no, child_desc); + generic_handle_irq_desc(child_desc); } } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 7a6640b..069f9e4 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -120,7 +120,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, return pinctrl_gpio_direction_output(chip->base + gpio); } -static void vf610_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void vf610_gpio_irq_handler(struct irq_desc *desc) { struct vf610_gpio_port *port = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-zx.c b/drivers/gpio/gpio-zx.c index 12ee196..4b8a269 100644 --- a/drivers/gpio/gpio-zx.c +++ b/drivers/gpio/gpio-zx.c @@ -177,7 +177,7 @@ static int zx_irq_type(struct irq_data *d, unsigned trigger) return 0; } -static void zx_irq_handler(unsigned irq, struct irq_desc *desc) +static void zx_irq_handler(struct irq_desc *desc) { unsigned long pending; int offset; diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index 27348e7..1d1a586 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -514,7 +514,7 @@ static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio, * application for that pin. * Note: A bug is reported if no handler is set for the gpio pin. */ -static void zynq_gpio_irqhandler(unsigned int irq, struct irq_desc *desc) +static void zynq_gpio_irqhandler(struct irq_desc *desc) { u32 int_sts, int_enb; unsigned int bank_num; diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 243f99a..649d03b 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -912,7 +912,7 @@ static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs) } } -static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ipu_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -925,7 +925,7 @@ static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ipu_err_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index e9c6f2a..94ddc96 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -65,12 +65,10 @@ static void combiner_unmask_irq(struct irq_data *data) __raw_writel(mask, combiner_base(data) + COMBINER_ENABLE_SET); } -static void combiner_handle_cascade_irq(unsigned int __irq, - struct irq_desc *desc) +static void combiner_handle_cascade_irq(struct irq_desc *desc) { struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq, combiner_irq; unsigned long status; @@ -88,7 +86,7 @@ static void combiner_handle_cascade_irq(unsigned int __irq, cascade_irq = irq_find_mapping(combiner_irq_domain, combiner_irq); if (unlikely(!cascade_irq)) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); else generic_handle_irq(cascade_irq); diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 39b72da..693b9fb 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -447,8 +447,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained) static void armada_370_xp_handle_msi_irq(struct pt_regs *r, bool b) {} #endif -static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq, - struct irq_desc *desc) +static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned long irqmap, irqn, irqsrc, cpuid; diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index ed4ca9d..56c9cf4 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -96,7 +96,7 @@ struct armctrl_ic { static struct armctrl_ic intc __read_mostly; static void __exception_irq_entry bcm2835_handle_irq( struct pt_regs *regs); -static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc); +static void bcm2836_chained_handle_irq(struct irq_desc *desc); static void armctrl_mask_irq(struct irq_data *d) { @@ -245,7 +245,7 @@ static void __exception_irq_entry bcm2835_handle_irq( handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs); } -static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc) +static void bcm2836_chained_handle_irq(struct irq_desc *desc) { u32 hwirq; diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 409bdc6..0fea985 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -115,7 +115,7 @@ static inline void l1_writel(u32 val, void __iomem *reg) writel(val, reg); } -static void bcm7038_l1_irq_handle(unsigned int irq, struct irq_desc *desc) +static void bcm7038_l1_irq_handle(struct irq_desc *desc) { struct bcm7038_l1_chip *intc = irq_desc_get_handler_data(desc); struct bcm7038_l1_cpu *cpu; diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index d3f9769..61b18ab 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -56,7 +56,7 @@ struct bcm7120_l2_intc_data { const __be32 *map_mask_prop; }; -static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc) +static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc) { struct bcm7120_l1_intc_data *data = irq_desc_get_handler_data(desc); struct bcm7120_l2_intc_data *b = data->b; diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index aedda06..65cd341 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -49,13 +49,12 @@ struct brcmstb_l2_intc_data { u32 saved_mask; /* for suspend/resume */ }; -static void brcmstb_l2_intc_irq_handle(unsigned int __irq, - struct irq_desc *desc) +static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) { struct brcmstb_l2_intc_data *b = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = irq_get_domain_generic_chip(b->domain, 0); struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int irq = irq_desc_get_irq(desc); + unsigned int irq; u32 status; chained_irq_enter(chip, desc); @@ -65,7 +64,7 @@ static void brcmstb_l2_intc_irq_handle(unsigned int __irq, if (status == 0) { raw_spin_lock(&desc->lock); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); raw_spin_unlock(&desc->lock); goto out; } diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c index efd95d9..052f266 100644 --- a/drivers/irqchip/irq-dw-apb-ictl.c +++ b/drivers/irqchip/irq-dw-apb-ictl.c @@ -26,7 +26,7 @@ #define APB_INT_FINALSTATUS_H 0x34 #define APB_INT_BASE_OFFSET 0x04 -static void dw_apb_ictl_handler(unsigned int irq, struct irq_desc *desc) +static void dw_apb_ictl_handler(struct irq_desc *desc) { struct irq_domain *d = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 9bccdd2..00bb7c0 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -341,7 +341,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) } while (1); } -static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) +static void gic_handle_cascade_irq(struct irq_desc *desc) { struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -360,7 +360,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); if (unlikely(gic_irq < 32 || gic_irq > 1020)) - handle_bad_irq(cascade_irq, desc); + handle_bad_irq(desc); else generic_handle_irq(cascade_irq); diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c index 4836102..e484fd2 100644 --- a/drivers/irqchip/irq-i8259.c +++ b/drivers/irqchip/irq-i8259.c @@ -352,7 +352,7 @@ void __init init_i8259_irqs(void) __init_i8259_irqs(NULL); } -static void i8259_irq_dispatch(unsigned int __irq, struct irq_desc *desc) +static void i8259_irq_dispatch(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); int hwirq = i8259_irq(); diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c index 841604b..c02d29c 100644 --- a/drivers/irqchip/irq-imgpdc.c +++ b/drivers/irqchip/irq-imgpdc.c @@ -218,7 +218,7 @@ static int pdc_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } -static void pdc_intc_perip_isr(unsigned int __irq, struct irq_desc *desc) +static void pdc_intc_perip_isr(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct pdc_intc_priv *priv; @@ -240,7 +240,7 @@ found: generic_handle_irq(irq_no); } -static void pdc_intc_syswake_isr(unsigned int irq, struct irq_desc *desc) +static void pdc_intc_syswake_isr(struct irq_desc *desc) { struct pdc_intc_priv *priv; unsigned int syswake, irq_no; diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index c151726..7b784b6 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -83,7 +83,7 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(unsigned __irq, struct irq_desc *desc) +static void keystone_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c index 5f4c529..8c38b3d 100644 --- a/drivers/irqchip/irq-metag-ext.c +++ b/drivers/irqchip/irq-metag-ext.c @@ -446,7 +446,7 @@ static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) * Whilst using TR2 to detect external interrupts is a software convention it is * (hopefully) unlikely to change. */ -static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +static void meta_intc_irq_demux(struct irq_desc *desc) { struct meta_intc_priv *priv = &meta_intc_priv; irq_hw_number_t hw; diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c index 3d23ce3..a5f053b 100644 --- a/drivers/irqchip/irq-metag.c +++ b/drivers/irqchip/irq-metag.c @@ -220,7 +220,7 @@ static int metag_internal_irq_set_affinity(struct irq_data *data, * occurred. It is this function's job to demux this irq and * figure out exactly which trigger needs servicing. */ -static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +static void metag_internal_irq_demux(struct irq_desc *desc) { struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); irq_hw_number_t hw; diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 1764bcf..af2f16b 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -546,7 +546,7 @@ static void __gic_irq_dispatch(void) gic_handle_shared_int(false); } -static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void gic_irq_dispatch(struct irq_desc *desc) { gic_handle_local_int(true); gic_handle_shared_int(true); diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 781ed6e..ea6e3a9 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -129,7 +129,7 @@ struct irq_chip icu_irq_chip = { .irq_unmask = icu_unmask_irq, }; -static void icu_mux_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void icu_mux_irq_demux(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_domain *domain; diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c index 5ea999a..be4c5a8 100644 --- a/drivers/irqchip/irq-orion.c +++ b/drivers/irqchip/irq-orion.c @@ -106,7 +106,7 @@ IRQCHIP_DECLARE(orion_intc, "marvell,orion-intc", orion_irq_init); #define ORION_BRIDGE_IRQ_CAUSE 0x00 #define ORION_BRIDGE_IRQ_MASK 0x04 -static void orion_bridge_irq_handler(unsigned int irq, struct irq_desc *desc) +static void orion_bridge_irq_handler(struct irq_desc *desc) { struct irq_domain *d = irq_desc_get_handler_data(desc); diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c index 506d9f2..e47572a 100644 --- a/drivers/irqchip/irq-s3c24xx.c +++ b/drivers/irqchip/irq-s3c24xx.c @@ -298,7 +298,7 @@ static struct irq_chip s3c_irq_eint0t4 = { .irq_set_type = s3c_irqext0_type, }; -static void s3c_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void s3c_irq_demux(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct s3c_irq_data *irq_data = irq_desc_get_chip_data(desc); diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 772a82c..c143dd5 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -58,7 +58,7 @@ static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off) return irq_reg_readl(gc, off); } -static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc) +static void sunxi_sc_nmi_handle_irq(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c index 3318296..848d782 100644 --- a/drivers/irqchip/irq-tb10x.c +++ b/drivers/irqchip/irq-tb10x.c @@ -97,7 +97,7 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type) return IRQ_SET_MASK_OK; } -static void tb10x_irq_cascade(unsigned int __irq, struct irq_desc *desc) +static void tb10x_irq_cascade(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); unsigned int irq = irq_desc_get_irq(desc); diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 16123f6..1b1c63e 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -65,19 +65,19 @@ static void fpga_irq_unmask(struct irq_data *d) writel(mask, f->base + IRQ_ENABLE_SET); } -static void fpga_irq_handle(unsigned int __irq, struct irq_desc *desc) +static void fpga_irq_handle(struct irq_desc *desc) { struct fpga_irq_data *f = irq_desc_get_handler_data(desc); - unsigned int irq = irq_desc_get_irq(desc); u32 status = readl(f->base + IRQ_STATUS); if (status == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } do { - irq = ffs(status) - 1; + unsigned int irq = ffs(status) - 1; + status &= ~(1 << irq); generic_handle_irq(irq_find_mapping(f->domain, irq)); } while (status); diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index 03846df..cb85504 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -225,7 +225,7 @@ static int handle_one_vic(struct vic_device *vic, struct pt_regs *regs) return handled; } -static void vic_handle_irq_cascaded(unsigned int irq, struct irq_desc *desc) +static void vic_handle_irq_cascaded(struct irq_desc *desc) { u32 stat, hwirq; struct irq_chip *host_chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index 4cbd9c5..c838c92 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -182,7 +182,7 @@ static struct spear_shirq *spear320_shirq_blocks[] = { &spear320_shirq_intrcomm_ras, }; -static void shirq_handler(unsigned __irq, struct irq_desc *desc) +static void shirq_handler(struct irq_desc *desc) { struct spear_shirq *shirq = irq_desc_get_handler_data(desc); u32 pend; diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 4b54128..a726f01 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -138,7 +138,7 @@ static void asic3_irq_flip_edge(struct asic3 *asic, spin_unlock_irqrestore(&asic->lock, flags); } -static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc) +static void asic3_irq_demux(struct irq_desc *desc) { struct asic3 *asic = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index a76eb6e..b279205 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -205,7 +205,7 @@ static void pcap_isr_work(struct work_struct *work) } while (gpio_get_value(pdata->gpio)); } -static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pcap_irq_handler(struct irq_desc *desc) { struct pcap_chip *pcap = irq_desc_get_handler_data(desc); diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c index 9131cdc..6ccaf90 100644 --- a/drivers/mfd/htc-egpio.c +++ b/drivers/mfd/htc-egpio.c @@ -98,7 +98,7 @@ static struct irq_chip egpio_muxed_chip = { .irq_unmask = egpio_unmask, }; -static void egpio_handler(unsigned int irq, struct irq_desc *desc) +static void egpio_handler(struct irq_desc *desc) { struct egpio_info *ei = irq_desc_get_handler_data(desc); int irqpin; diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c index 5bb49f0..798e443 100644 --- a/drivers/mfd/jz4740-adc.c +++ b/drivers/mfd/jz4740-adc.c @@ -65,7 +65,7 @@ struct jz4740_adc { spinlock_t lock; }; -static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc) +static void jz4740_adc_irq_demux(struct irq_desc *desc) { struct irq_chip_generic *gc = irq_desc_get_handler_data(desc); uint8_t status; diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index 59502d0..1b7ec08 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -156,7 +156,7 @@ static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master) return ret; } -static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pm8xxx_irq_handler(struct irq_desc *desc) { struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); struct irq_chip *irq_chip = irq_desc_get_chip(desc); diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c index 16fc1ad..94bd89c 100644 --- a/drivers/mfd/t7l66xb.c +++ b/drivers/mfd/t7l66xb.c @@ -185,7 +185,7 @@ static struct mfd_cell t7l66xb_cells[] = { /*--------------------------------------------------------------------------*/ /* Handle the T7L66XB interrupt mux */ -static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc) +static void t7l66xb_irq(struct irq_desc *desc) { struct t7l66xb *t7l66xb = irq_desc_get_handler_data(desc); unsigned int isr; diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 775b9ac..8c84a51 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -522,8 +522,7 @@ static int tc6393xb_register_gpio(struct tc6393xb *tc6393xb, int gpio_base) /*--------------------------------------------------------------------------*/ -static void -tc6393xb_irq(unsigned int irq, struct irq_desc *desc) +static void tc6393xb_irq(struct irq_desc *desc) { struct tc6393xb *tc6393xb = irq_desc_get_handler_data(desc); unsigned int isr; diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 9a23021..f691d7e 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -282,7 +282,7 @@ void ucb1x00_adc_disable(struct ucb1x00 *ucb) * SIBCLK to talk to the chip. We leave the clock running until * we have finished processing all interrupts from the chip. */ -static void ucb1x00_irq(unsigned int __irq, struct irq_desc *desc) +static void ucb1x00_irq(struct irq_desc *desc) { struct ucb1x00 *ucb = irq_desc_get_handler_data(desc); unsigned int isr, i; diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index 81253e7..0aa81bd 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -110,7 +110,7 @@ static int ks_pcie_establish_link(struct keystone_pcie *ks_pcie) return -EINVAL; } -static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void ks_pcie_msi_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); @@ -138,8 +138,7 @@ static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc) * Traverse through pending legacy interrupts and invoke handler for each. Also * takes care of interrupt controller level mask/ack operation. */ -static void ks_pcie_legacy_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void ks_pcie_legacy_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c index 996327c..e491681 100644 --- a/drivers/pci/host/pci-xgene-msi.c +++ b/drivers/pci/host/pci-xgene-msi.c @@ -295,7 +295,7 @@ static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) return 0; } -static void xgene_msi_isr(unsigned int irq, struct irq_desc *desc) +static void xgene_msi_isr(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct xgene_msi_group *msi_groups; diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c index 7d9482b..1ca7830 100644 --- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c @@ -143,7 +143,7 @@ static inline bool cygnus_get_bit(struct cygnus_gpio *chip, unsigned int reg, return !!(readl(chip->base + offset) & BIT(shift)); } -static void cygnus_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void cygnus_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct cygnus_gpio *chip = to_cygnus_gpio(gc); diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index dac4865..f79ea43 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -425,7 +425,7 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } } -static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void byt_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct byt_gpio *vg = to_byt_gpio(irq_desc_get_handler_data(desc)); diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 2d5d3dd..270c127 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1414,7 +1414,7 @@ static struct irq_chip chv_gpio_irqchip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void chv_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void chv_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index bb377c1..54848b8 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -836,7 +836,7 @@ static void intel_gpio_community_irq_handler(struct gpio_chip *gc, } } -static void intel_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void intel_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct intel_pinctrl *pctrl = gpiochip_to_pinctrl(gc); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 7726c6c..1b22f96 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1190,7 +1190,7 @@ mtk_eint_debounce_process(struct mtk_pinctrl *pctl, int index) } } -static void mtk_eint_irq_handler(unsigned irq, struct irq_desc *desc) +static void mtk_eint_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct mtk_pinctrl *pctl = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 352ede1..96cf039 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -860,7 +860,7 @@ static void __nmk_gpio_irq_handler(struct irq_desc *desc, u32 status) chained_irq_exit(host_chip, desc); } -static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void nmk_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *chip = irq_desc_get_handler_data(desc); struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip); @@ -873,7 +873,7 @@ static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) __nmk_gpio_irq_handler(desc, status); } -static void nmk_gpio_latent_irq_handler(unsigned int irq, struct irq_desc *desc) +static void nmk_gpio_latent_irq_handler(struct irq_desc *desc) { struct gpio_chip *chip = irq_desc_get_handler_data(desc); struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip); diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index a5976eb..f6be685 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -530,8 +530,7 @@ static inline void preflow_handler(struct irq_desc *desc) static inline void preflow_handler(struct irq_desc *desc) { } #endif -static void adi_gpio_handle_pint_irq(unsigned int inta_irq, - struct irq_desc *desc) +static void adi_gpio_handle_pint_irq(struct irq_desc *desc) { u32 request; u32 level_mask, hwirq; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5e86bb8..3318f1d 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -492,15 +492,15 @@ static struct irq_chip amd_gpio_irqchip = { .irq_set_type = amd_gpio_irq_set_type, }; -static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void amd_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); u32 i; u32 off; u32 reg; u32 pin_reg; u64 reg64; int handled = 0; + unsigned int irq; unsigned long flags; struct irq_chip *chip = irq_desc_get_chip(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); @@ -541,7 +541,7 @@ static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) } if (handled == 0) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); spin_lock_irqsave(&gpio_dev->lock, flags); reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index bae0012..b0fde0f 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1585,7 +1585,7 @@ static struct irq_chip gpio_irqchip = { .irq_set_wake = gpio_irq_set_wake, }; -static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct gpio_chip *gpio_chip = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/pinctrl-coh901.c b/drivers/pinctrl/pinctrl-coh901.c index 3731cc6..9c9b889 100644 --- a/drivers/pinctrl/pinctrl-coh901.c +++ b/drivers/pinctrl/pinctrl-coh901.c @@ -519,7 +519,7 @@ static struct irq_chip u300_gpio_irqchip = { .irq_set_type = u300_gpio_irq_type, }; -static void u300_gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void u300_gpio_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_chip *parent_chip = irq_desc_get_chip(desc); diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index f22d585..952b1c6 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1310,13 +1310,11 @@ static int pistachio_gpio_irq_set_type(struct irq_data *data, unsigned int type) return 0; } -static void pistachio_gpio_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void pistachio_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct pistachio_gpio_bank *bank = gc_to_bank(gc); - struct irq_chip *chip = irq_get_chip(irq); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned long pending; unsigned int pin; diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index c5246c0..88bb707 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -1475,7 +1475,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = { * Interrupt handling */ -static void rockchip_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void rockchip_irq_demux(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index bf548c2..ef04b96 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1679,7 +1679,7 @@ static irqreturn_t pcs_irq_handler(int irq, void *d) * Use this if you have a separate interrupt for each * pinctrl-single instance. */ -static void pcs_irq_chain_handler(unsigned int irq, struct irq_desc *desc) +static void pcs_irq_chain_handler(struct irq_desc *desc) { struct pcs_soc_data *pcs_soc = irq_desc_get_handler_data(desc); struct irq_chip *chip; diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index f8338d2..389526e 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1460,7 +1460,7 @@ static void __gpio_irq_handler(struct st_gpio_bank *bank) } } -static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void st_gpio_irq_handler(struct irq_desc *desc) { /* interrupt dedicated per bank */ struct irq_chip *chip = irq_desc_get_chip(desc); @@ -1472,7 +1472,7 @@ static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void st_gpio_irqmux_handler(unsigned irq, struct irq_desc *desc) +static void st_gpio_irqmux_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct st_pinctrl *info = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 492cdd5..a0c7407 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -765,9 +765,8 @@ static struct irq_chip msm_gpio_irq_chip = { .irq_set_wake = msm_gpio_irq_set_wake, }; -static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void msm_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); const struct msm_pingroup *g; struct msm_pinctrl *pctrl = to_msm_pinctrl(gc); @@ -795,7 +794,7 @@ static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) /* No interrupts were flagged */ if (handled == 0) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); chained_irq_exit(chip, desc); } diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 5f45caa..71ccf6a 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -419,7 +419,7 @@ static const struct of_device_id exynos_wkup_irq_ids[] = { }; /* interrupt handler for wakeup interrupts 0..15 */ -static void exynos_irq_eint0_15(unsigned int irq, struct irq_desc *desc) +static void exynos_irq_eint0_15(struct irq_desc *desc) { struct exynos_weint_data *eintd = irq_desc_get_handler_data(desc); struct samsung_pin_bank *bank = eintd->bank; @@ -451,7 +451,7 @@ static inline void exynos_irq_demux_eint(unsigned long pend, } /* interrupt handler for wakeup interrupt 16 */ -static void exynos_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc) +static void exynos_irq_demux_eint16_31(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct exynos_muxed_weint_data *eintd = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 019844d..b3cd9ae 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -240,7 +240,7 @@ static struct irq_chip s3c2410_eint0_3_chip = { .irq_set_type = s3c24xx_eint_type, }; -static void s3c2410_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c2410_demux_eint0_3(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc); @@ -295,7 +295,7 @@ static struct irq_chip s3c2412_eint0_3_chip = { .irq_set_type = s3c24xx_eint_type, }; -static void s3c2412_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c2412_demux_eint0_3(struct irq_desc *desc) { struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); @@ -388,12 +388,12 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, chained_irq_exit(chip, desc); } -static void s3c24xx_demux_eint4_7(unsigned int irq, struct irq_desc *desc) +static void s3c24xx_demux_eint4_7(struct irq_desc *desc) { s3c24xx_demux_eint(desc, 0, 0xf0); } -static void s3c24xx_demux_eint8_23(unsigned int irq, struct irq_desc *desc) +static void s3c24xx_demux_eint8_23(struct irq_desc *desc) { s3c24xx_demux_eint(desc, 8, 0xffff00); } diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index f5ea40a..43407ab 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -407,7 +407,7 @@ static const struct irq_domain_ops s3c64xx_gpio_irqd_ops = { .xlate = irq_domain_xlate_twocell, }; -static void s3c64xx_eint_gpio_irq(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_eint_gpio_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct s3c64xx_eint_gpio_data *data = irq_desc_get_handler_data(desc); @@ -631,22 +631,22 @@ static inline void s3c64xx_irq_demux_eint(struct irq_desc *desc, u32 range) chained_irq_exit(chip, desc); } -static void s3c64xx_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint0_3(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xf); } -static void s3c64xx_demux_eint4_11(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint4_11(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff0); } -static void s3c64xx_demux_eint12_19(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint12_19(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff000); } -static void s3c64xx_demux_eint20_27(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint20_27(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff00000); } diff --git a/drivers/pinctrl/sirf/pinctrl-atlas7.c b/drivers/pinctrl/sirf/pinctrl-atlas7.c index 9df0c5f..0d24d9e 100644 --- a/drivers/pinctrl/sirf/pinctrl-atlas7.c +++ b/drivers/pinctrl/sirf/pinctrl-atlas7.c @@ -4489,7 +4489,7 @@ static struct irq_chip atlas7_gpio_irq_chip = { .irq_set_type = atlas7_gpio_irq_type, }; -static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) +static void atlas7_gpio_handle_irq(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct atlas7_gpio_chip *a7gc = to_atlas7_gpio(gc); @@ -4512,7 +4512,7 @@ static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) if (!status) { pr_warn("%s: gpio [%s] status %#x no interrupt is flaged\n", __func__, gc->label, status); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); return; } diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index f8bd9fb..2a8d697 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -545,7 +545,7 @@ static struct irq_chip sirfsoc_irq_chip = { .irq_set_type = sirfsoc_gpio_irq_type, }; -static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) +static void sirfsoc_gpio_handle_irq(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); @@ -570,7 +570,7 @@ static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) printk(KERN_WARNING "%s: gpio id %d status %#x no interrupt is flagged\n", __func__, bank->id, status); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); return; } diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index ae8f29f..1f0af25 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -356,7 +356,7 @@ static struct irq_chip plgpio_irqchip = { .irq_set_type = plgpio_irq_set_type, }; -static void plgpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void plgpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct plgpio *plgpio = container_of(gc, struct plgpio, chip); diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 31af97d..38e0c7b 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -740,7 +740,7 @@ static struct irq_domain_ops sunxi_pinctrl_irq_domain_ops = { .xlate = sunxi_pinctrl_irq_of_xlate, }; -static void sunxi_pinctrl_irq_handler(unsigned __irq, struct irq_desc *desc) +static void sunxi_pinctrl_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 043419d..8e72bcb 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -65,7 +65,7 @@ void intc_set_prio_level(unsigned int irq, unsigned int level) raw_spin_unlock_irqrestore(&intc_big_lock, flags); } -static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc) +static void intc_redirect_irq(struct irq_desc *desc) { generic_handle_irq((unsigned int)irq_desc_get_handler_data(desc)); } diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c index bafc51c..e789962 100644 --- a/drivers/sh/intc/virq.c +++ b/drivers/sh/intc/virq.c @@ -109,7 +109,7 @@ static int add_virq_to_pirq(unsigned int irq, unsigned int virq) return 0; } -static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc) +static void intc_virq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_data *data = irq_desc_get_irq_data(desc); @@ -127,7 +127,7 @@ static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc) handle = (unsigned long)irq_desc_get_handler_data(vdesc); addr = INTC_REG(d, _INTC_ADDR_E(handle), 0); if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0)) - generic_handle_irq_desc(entry->irq, vdesc); + generic_handle_irq_desc(vdesc); } } diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c index 6bc13f9..052aecf 100644 --- a/drivers/soc/dove/pmu.c +++ b/drivers/soc/dove/pmu.c @@ -222,7 +222,7 @@ static void __pmu_domain_register(struct pmu_domain *domain, } /* PMU IRQ controller */ -static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pmu_irq_handler(struct irq_desc *desc) { struct pmu_data *pmu = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = pmu->irq_gc; @@ -232,7 +232,7 @@ static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) u32 done = ~0; if (stat == 0) { - handle_bad_irq(irq_desc_get_irq(desc), desc); + handle_bad_irq(desc); return; } diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index bdfb3c8..4a3cf9b 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -451,7 +451,7 @@ static void periph_interrupt(struct spmi_pmic_arb_dev *pa, u8 apid) } } -static void pmic_arb_chained_irq(unsigned int irq, struct irq_desc *desc) +static void pmic_arb_chained_irq(struct irq_desc *desc) { struct spmi_pmic_arb_dev *pa = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/include/linux/irq.h b/include/linux/irq.h index 4913c32..11bf092 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -475,14 +475,14 @@ static inline int irq_set_parent(int irq, int parent_irq) * Built-in IRQ handlers for various IRQ types, * callable via desc->handle_irq() */ -extern void handle_level_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); +extern void handle_level_irq(struct irq_desc *desc); +extern void handle_fasteoi_irq(struct irq_desc *desc); +extern void handle_edge_irq(struct irq_desc *desc); +extern void handle_edge_eoi_irq(struct irq_desc *desc); +extern void handle_simple_irq(struct irq_desc *desc); +extern void handle_percpu_irq(struct irq_desc *desc); +extern void handle_percpu_devid_irq(struct irq_desc *desc); +extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index fbb4d5a..a587a33 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -135,9 +135,9 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) * Architectures call this to let the generic IRQ layer * handle an interrupt. */ -static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) +static inline void generic_handle_irq_desc(struct irq_desc *desc) { - desc->handle_irq(irq, desc); + desc->handle_irq(desc); } int generic_handle_irq(unsigned int irq); diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index 62d5430..661bed0 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ struct irq_desc; struct irq_data; -typedef void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc); +typedef void (*irq_flow_handler_t)(struct irq_desc *desc); typedef void (*irq_preflow_handler_t)(struct irq_data *data); #endif diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 8c55d54..e28169d 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc) /** * handle_simple_irq - Simple and software-decoded IRQs. - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Simple interrupts are either sent from a demultiplexing interrupt @@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc) * Note: The caller is expected to handle the ack, clear, mask and * unmask issues if necessary. */ -void -handle_simple_irq(unsigned int irq, struct irq_desc *desc) +void handle_simple_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); @@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc) /** * handle_level_irq - Level type irq handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Level type interrupts are active as long as the hardware line has @@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc) * it after the associated handler has acknowledged the device, so the * interrupt line is back to inactive. */ -void -handle_level_irq(unsigned int irq, struct irq_desc *desc) +void handle_level_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); mask_ack_irq(desc); @@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) /** * handle_fasteoi_irq - irq handler for transparent controllers - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Only a single callback will be issued to the chip: an ->eoi() @@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) * for modern forms of interrupt handlers, which handle the flow * details in hardware, transparently. */ -void -handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) +void handle_fasteoi_irq(struct irq_desc *desc) { struct irq_chip *chip = desc->irq_data.chip; @@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); /** * handle_edge_irq - edge type IRQ handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Interrupt occures on the falling and/or rising edge of a hardware @@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); * the handler was running. If all pending interrupts are handled, the * loop is left. */ -void -handle_edge_irq(unsigned int irq, struct irq_desc *desc) +void handle_edge_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); @@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq); #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER /** * handle_edge_eoi_irq - edge eoi type IRQ handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Similar as the above handle_edge_irq, but using eoi and w/o the * mask/unmask logic. */ -void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) +void handle_edge_eoi_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -665,13 +656,11 @@ out_eoi: /** * handle_percpu_irq - Per CPU local irq handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Per CPU interrupts on SMP machines without locking requirements */ -void -handle_percpu_irq(unsigned int irq, struct irq_desc *desc) +void handle_percpu_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) /** * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Per CPU interrupts on SMP machines without locking requirements. Same as @@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) * contain the real device id for the cpu on which this handler is * called */ -void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) +void handle_percpu_devid_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irqaction *action = desc->action; void *dev_id = raw_cpu_ptr(action->percpu_dev_id); + unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; kstat_incr_irqs_this_cpu(desc); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b6eeea8..de41a68 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -27,8 +27,10 @@ * * Handles spurious and unhandled IRQ's. It also prints a debugmessage. */ -void handle_bad_irq(unsigned int irq, struct irq_desc *desc) +void handle_bad_irq(struct irq_desc *desc) { + unsigned int irq = irq_desc_get_irq(desc); + print_irq_desc(irq, desc); kstat_incr_irqs_this_cpu(desc); ack_bad_irq(irq); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 5966694..239e2ae 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -347,7 +347,7 @@ int generic_handle_irq(unsigned int irq) if (!desc) return -EINVAL; - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); return 0; } EXPORT_SYMBOL_GPL(generic_handle_irq); diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index dd95f44..b86886b 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) clear_bit(irq, irqs_resend); desc = irq_to_desc(irq); local_irq_disable(); - desc->handle_irq(irq, desc); + desc->handle_irq(desc); local_irq_enable(); } } -- cgit v0.10.2 From 1713e5aa05fff3951e747548b373bd2c81be4e7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Sep 2015 10:54:37 +0100 Subject: arm64: KVM: Fix user access for debug registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setting the debug register from userspace, make sure that copy_from_user() is called with its parameters in the expected order. It otherwise doesn't do what you think. Fixes: 84e690bfbed1 ("KVM: arm64: introduce vcpu->arch.debug_ptr") Reported-by: Peter Maydell Cc: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b41607d..1d0463e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -272,7 +272,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } @@ -314,7 +314,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; @@ -358,7 +358,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } @@ -400,7 +400,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } -- cgit v0.10.2 From ca09f02f122b2ecb0f5ddfc5fd47b29ed657d4fd Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 16 Sep 2015 12:04:55 +0200 Subject: arm: KVM: Fix incorrect device to IPA mapping A critical bug has been found in device memory stage1 translation for VMs with more then 4GB of address space. Once vm_pgoff size is smaller then pa (which is true for LPAE case, u32 and u64 respectively) some more significant bits of pa may be lost as a shift operation is performed on u32 and later cast onto u64. Example: vm_pgoff(u32)=0x00210030, PAGE_SHIFT=12 expected pa(u64): 0x0000002010030000 produced pa(u64): 0x0000000010030000 The fix is to change the order of operations (casting first onto phys_addr_t and then shifting). Reviewed-by: Marc Zyngier [maz: fixed changelog and patch formatting] Cc: stable@vger.kernel.org Signed-off-by: Marek Majtyka Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7b42012..6984342 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1792,8 +1792,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) -- cgit v0.10.2 From 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 14:10:03 +0100 Subject: x86/platform: Fix Geode LX timekeeping in the generic x86 build In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") bypassed verification of the TSC on Geode LX. However, this code (now in the check_system_tsc_reliable() function in arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was set. OpenWRT has recently started building its generic Geode target for Geode GX, not LX, to include support for additional platforms. This broke the timekeeping on LX-based devices, because the TSC wasn't marked as reliable: https://dev.openwrt.org/ticket/20531 By adding a runtime check on is_geode_lx(), we can also include the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus fixing the problem. Signed-off-by: David Woodhouse Cc: Andres Salomon Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 79055cf..51e62d6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1015,15 +1016,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; -- cgit v0.10.2 From ca0141de743dc7760fdfeb9210b82dadf6d9b221 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Aug 2015 18:01:21 -0500 Subject: gpu/drm: Kill off set_irq_flags usage set_irq_flags is ARM specific with custom flags which have genirq equivalents. Convert drivers to use the genirq interfaces directly, so we can kill off set_irq_flags. The translation of flags is as follows: IRQF_VALID -> !IRQ_NOREQUEST IRQF_PROBE -> !IRQ_NOPROBE IRQF_NOAUTOEN -> IRQ_NOAUTOEN For IRQs managed by an irqdomain, the irqdomain core code handles clearing and setting IRQ_NOREQUEST already, so there is no need to do this in .map() functions and we can simply remove the set_irq_flags calls. Some users also modify IRQ_NOPROBE and this has been maintained although it is not clear that is really needed. There appears to be a great deal of blind copy and paste of this code. Signed-off-by: Rob Herring Cc: linux-arm-kernel@lists.infradead.org Cc: Russell King Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Gleixner diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index b1f73be..b0d4b53 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -178,7 +178,6 @@ static int mdp5_hw_irqdomain_map(struct irq_domain *d, irq_set_chip_and_handler(irq, &mdp5_hw_irq_chip, handle_level_irq); irq_set_chip_data(irq, mdp5_kms); - set_irq_flags(irq, IRQF_VALID); return 0; } diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 649d03b..e5a38d2 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1099,8 +1099,7 @@ static int ipu_irq_init(struct ipu_soc *ipu) } ret = irq_alloc_domain_generic_chips(ipu->domain, 32, 1, "IPU", - handle_level_irq, 0, - IRQF_VALID, 0); + handle_level_irq, 0, 0, 0); if (ret < 0) { dev_err(ipu->dev, "failed to alloc generic irq chips\n"); irq_domain_remove(ipu->domain); -- cgit v0.10.2 From d17cab4451df1f25f3a46369e0aaeaa18390fa6b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Aug 2015 18:01:22 -0500 Subject: irqchip: Kill off set_irq_flags usage set_irq_flags is ARM specific with custom flags which have genirq equivalents. Convert drivers to use the genirq interfaces directly, so we can kill off set_irq_flags. The translation of flags is as follows: IRQF_VALID -> !IRQ_NOREQUEST IRQF_PROBE -> !IRQ_NOPROBE IRQF_NOAUTOEN -> IRQ_NOAUTOEN For IRQs managed by an irqdomain, the irqdomain core code handles clearing and setting IRQ_NOREQUEST already, so there is no need to do this in .map() functions and we can simply remove the set_irq_flags calls. Some users also modify IRQ_NOPROBE and this has been maintained although it is not clear that is really needed. There appears to be a great deal of blind copy and paste of this code. Signed-off-by: Rob Herring Cc: linux-arm-kernel@lists.infradead.org Cc: Russell King Cc: Jason Cooper Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: Stephen Warren Cc: Lee Jones Cc: Alexander Shiyan Cc: Maxime Ripard Cc: linux-rpi-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Link: http://lkml.kernel.org/r/1440889285-5637-3-git-send-email-robh@kernel.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index 94ddc96..cd7d3bc 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -163,7 +163,7 @@ static int combiner_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_and_handler(irq, &combiner_chip, handle_level_irq); irq_set_chip_data(irq, &combiner_data[hw >> 3]); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 693b9fb..655cb96 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -200,7 +200,6 @@ static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq, { irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip, handle_simple_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } @@ -317,7 +316,7 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, handle_level_irq); } - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index 56c9cf4..bf9cc5f 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -166,7 +166,7 @@ static int __init armctrl_of_init(struct device_node *node, BUG_ON(irq <= 0); irq_set_chip_and_handler(irq, &armctrl_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } } diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c index 2dd929e..eb5eb0c 100644 --- a/drivers/irqchip/irq-clps711x.c +++ b/drivers/irqchip/irq-clps711x.c @@ -132,14 +132,14 @@ static int __init clps711x_intc_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { irq_flow_handler_t handler = handle_level_irq; - unsigned int flags = IRQF_VALID | IRQF_PROBE; + unsigned int flags = 0; if (!clps711x_irqs[hw].flags) return 0; if (clps711x_irqs[hw].flags & CLPS711X_FLAG_FIQ) { handler = handle_bad_irq; - flags |= IRQF_NOAUTOEN; + flags |= IRQ_NOAUTOEN; } else if (clps711x_irqs[hw].eoi) { handler = handle_fasteoi_irq; } @@ -149,7 +149,7 @@ static int __init clps711x_intc_irq_map(struct irq_domain *h, unsigned int virq, writel_relaxed(0, clps711x_intc->base + clps711x_irqs[hw].eoi); irq_set_chip_and_handler(virq, &clps711x_intc_chip, handler); - set_irq_flags(virq, flags); + irq_modify_status(virq, IRQ_NOPROBE, flags); return 0; } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b5418f5..36ecfc8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -752,13 +752,13 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } /* SPIs */ if (hw >= 32 && hw < gic_data.irq_nr) { irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } /* LPIs */ if (hw >= 8192 && hw < GIC_ID_NR) { @@ -766,7 +766,6 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, return -EPERM; irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID); } return 0; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 00bb7c0..982c09c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -890,11 +890,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } else { irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } return 0; } diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index a0128c7..8f3ca8f 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -307,11 +307,11 @@ static int hip04_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_set_chip_and_handler(irq, &hip04_irq_chip, handle_percpu_devid_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } else { irq_set_chip_and_handler(irq, &hip04_irq_chip, handle_fasteoi_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } irq_set_chip_data(irq, d->host_data); return 0; diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index 7b784b6..deb89d6 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -127,7 +127,7 @@ static int keystone_irq_map(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, kirq); irq_set_chip_and_handler(virq, &kirq->chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index ea6e3a9..013fc96 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -164,7 +164,6 @@ static int mmp_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID); return 0; } @@ -234,7 +233,6 @@ void __init icu_init_irq(void) for (irq = 0; irq < 64; irq++) { icu_mask_irq(irq_get_irq_data(irq)); irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID); } irq_set_default_host(icu_data[0].domain); set_handle_irq(mmp_handle_irq); @@ -337,7 +335,6 @@ void __init mmp2_init_icu(void) irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); } - set_irq_flags(irq, IRQF_VALID); } irq_set_default_host(icu_data[0].domain); set_handle_irq(mmp2_handle_irq); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 1faf812..604df63 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -84,7 +84,6 @@ static int icoll_irq_domain_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &mxs_icoll_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index d3546a6..9525335 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -353,7 +353,6 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, h->host_data); irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); /* kill me now */ return 0; } diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c index e47572a..7154b01 100644 --- a/drivers/irqchip/irq-s3c24xx.c +++ b/drivers/irqchip/irq-s3c24xx.c @@ -466,13 +466,11 @@ static int s3c24xx_irq_map(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, irq_data); - set_irq_flags(virq, IRQF_VALID); - if (parent_intc && irq_data->type != S3C_IRQTYPE_NONE) { if (irq_data->parent_irq > 31) { pr_err("irq-s3c24xx: parent irq %lu is out of range\n", irq_data->parent_irq); - goto err; + return -EINVAL; } parent_irq_data = &parent_intc->irqs[irq_data->parent_irq]; @@ -485,18 +483,12 @@ static int s3c24xx_irq_map(struct irq_domain *h, unsigned int virq, if (!irqno) { pr_err("irq-s3c24xx: could not find mapping for parent irq %lu\n", irq_data->parent_irq); - goto err; + return -EINVAL; } irq_set_chained_handler(irqno, s3c_irq_demux); } return 0; - -err: - set_irq_flags(virq, 0); - - /* the only error can result from bad mapping data*/ - return -EINVAL; } static const struct irq_domain_ops s3c24xx_irq_ops = { @@ -1174,8 +1166,6 @@ static int s3c24xx_irq_map_of(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, irq_data); - set_irq_flags(virq, IRQF_VALID); - return 0; } diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 4ad3e7c..0704362 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -83,7 +83,7 @@ static int sun4i_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &sun4i_irq_chip, handle_fasteoi_irq); - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 1b1c63e..598ab3f 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -128,7 +128,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, f); irq_set_chip_and_handler(irq, &f->chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index cb85504..b956dff 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -201,7 +201,7 @@ static int vic_irqdomain_map(struct irq_domain *d, unsigned int irq, return -EPERM; irq_set_chip_and_handler(irq, &vic_chip, handle_level_irq); irq_set_chip_data(irq, v->base); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } diff --git a/drivers/irqchip/irq-vt8500.c b/drivers/irqchip/irq-vt8500.c index 8371d99..f9af0af 100644 --- a/drivers/irqchip/irq-vt8500.c +++ b/drivers/irqchip/irq-vt8500.c @@ -167,7 +167,6 @@ static int vt8500_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &vt8500_irq_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index c838c92..1ccd2ab 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -211,7 +211,6 @@ static void __init spear_shirq_register(struct spear_shirq *shirq, for (i = 0; i < shirq->nr_irqs; i++) { irq_set_chip_and_handler(shirq->virq_base + i, shirq->irq_chip, handle_simple_irq); - set_irq_flags(shirq->virq_base + i, IRQF_VALID); irq_set_chip_data(shirq->virq_base + i, shirq); } } -- cgit v0.10.2 From 3e15135b98ecaa0228eb5dad42be7516cac38aa0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Aug 2015 18:01:23 -0500 Subject: sh: Kill off set_irq_flags usage set_irq_flags is ARM specific with custom flags which have genirq equivalents. Convert drivers to use the genirq interfaces directly, so we can kill off set_irq_flags. The translation of flags is as follows: IRQF_VALID -> !IRQ_NOREQUEST IRQF_PROBE -> !IRQ_NOPROBE IRQF_NOAUTOEN -> IRQ_NOAUTOEN For IRQs managed by an irqdomain, the irqdomain core code handles clearing and setting IRQ_NOREQUEST already, so there is no need to do this in .map() functions and we can simply remove the set_irq_flags calls. Some users also modify IRQ_NOPROBE and this has been maintained although it is not clear that is really needed. There appears to be a great deal of blind copy and paste of this code. Signed-off-by: Rob Herring Acked-by: Simon Horman Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sh@vger.kernel.org Cc: Russell King Cc: Magnus Damm Link: http://lkml.kernel.org/r/1440889285-5637-4-git-send-email-robh@kernel.org Signed-off-by: Thomas Gleixner diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h index 7dff08e..6ce7f0d 100644 --- a/drivers/sh/intc/internals.h +++ b/drivers/sh/intc/internals.h @@ -99,15 +99,7 @@ static inline struct intc_desc_int *get_intc_desc(unsigned int irq) */ static inline void activate_irq(int irq) { -#ifdef CONFIG_ARM - /* ARM requires an extra step to clear IRQ_NOREQUEST, which it - * sets on behalf of every irq_chip. Also sets IRQ_NOPROBE. - */ - set_irq_flags(irq, IRQF_VALID); -#else - /* same effect on other architectures */ - irq_set_noprobe(irq); -#endif + irq_modify_status(irq, IRQ_NOREQUEST, IRQ_NOPROBE); } static inline int intc_handle_int_cmp(const void *a, const void *b) -- cgit v0.10.2 From eb811129ed9ea50ef2dfe8a83ddde6a16d1eb8d4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Aug 2015 18:01:24 -0500 Subject: ARM: Remove ununsed set_irq_flags Now that all users of set_irq_flags and custom flags are converted to genirq functions, the ARM specific set_irq_flags can be removed. Signed-off-by: Rob Herring Tested-by: Kevin Hilman Cc: linux-arm-kernel@lists.infradead.org Cc: Russell King Signed-off-by: Thomas Gleixner diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h index af79da4..9beb929 100644 --- a/arch/arm/include/asm/hw_irq.h +++ b/arch/arm/include/asm/hw_irq.h @@ -11,12 +11,6 @@ static inline void ack_bad_irq(int irq) pr_crit("unexpected IRQ trap at vector %02x\n", irq); } -void set_irq_flags(unsigned int irq, unsigned int flags); - -#define IRQF_VALID (1 << 0) -#define IRQF_PROBE (1 << 1) -#define IRQF_NOAUTOEN (1 << 2) - #define ARCH_IRQ_INIT_FLAGS (IRQ_NOREQUEST | IRQ_NOPROBE) #endif diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 5ff4826..2766183 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -79,26 +79,6 @@ asm_do_IRQ(unsigned int irq, struct pt_regs *regs) handle_IRQ(irq, regs); } -void set_irq_flags(unsigned int irq, unsigned int iflags) -{ - unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - - if (irq >= nr_irqs) { - pr_err("Trying to set irq flags for IRQ%d\n", irq); - return; - } - - if (iflags & IRQF_VALID) - clr |= IRQ_NOREQUEST; - if (iflags & IRQF_PROBE) - clr |= IRQ_NOPROBE; - if (!(iflags & IRQF_NOAUTOEN)) - clr |= IRQ_NOAUTOEN; - /* Order is clear bits in "clr" then set bits in "set" */ - irq_modify_status(irq, clr, set & ~clr); -} -EXPORT_SYMBOL_GPL(set_irq_flags); - void __init init_IRQ(void) { int ret; -- cgit v0.10.2 From ae80a2f2d142b97ec68434313f0263256916683d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Aug 2015 18:01:25 -0500 Subject: arm64: Remove ununsed set_irq_flags Now that all users of set_irq_flags and custom flags are converted to genirq functions, the ARM specific set_irq_flags can be removed. Signed-off-by: Rob Herring Acked-by: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Cc: Russell King Cc: Will Deacon Signed-off-by: Thomas Gleixner diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 2bb7009..a57601f 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -43,9 +43,4 @@ static inline void ack_bad_irq(unsigned int irq) irq_err_count++; } -/* - * No arch-specific IRQ flags. - */ -#define set_irq_flags(irq, flags) - #endif /* __ASM_HARDIRQ_H */ -- cgit v0.10.2 From 9bf9fde2c98ba8362ea1d41d8bd8b32a23776e67 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Wed, 16 Sep 2015 09:13:50 -0400 Subject: KVM: s390: Replace incorrect atomic_or with atomic_andnot The offending commit accidentally replaces an atomic_clear with an atomic_or instead of an atomic_andnot in kvm_s390_vcpu_request_handled. The symptom is that kvm guests on s390 hang on startup. This patch simply replaces the incorrect atomic_or with atomic_andnot Fixes: 805de8f43c20 (atomic: Replace atomic_{set,clear}_mask() usage) Signed-off-by: Jason J. Herne Acked-by: Christian Borntraeger Acked-by: Peter Zijlstra (Intel) Acked-by: Cornelia Huck Signed-off-by: Paolo Bonzini diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f807ab..0a67c40 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1575,7 +1575,7 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) { - atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); + atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); } /* -- cgit v0.10.2 From f9f9e7b776142fb1c0782cade004cc8e0147a199 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2015 11:51:12 -0400 Subject: Revert "cgroup: simplify threadgroup locking" This reverts commit b5ba75b5fc0e8404e2c50cb68f39bb6a53fc916f. d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Reported-by: Christian Borntraeger Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Paolo Bonzini Cc: stable@vger.kernel.org # v4.2+ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cf0f79..115091e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, if (!cgrp) return -ENODEV; - percpu_down_write(&cgroup_threadgroup_rwsem); +retry_find_task: rcu_read_lock(); if (pid) { tsk = find_task_by_vpid(pid); if (!tsk) { + rcu_read_unlock(); ret = -ESRCH; - goto out_unlock_rcu; + goto out_unlock_cgroup; } } else { tsk = current; @@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, */ if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; - goto out_unlock_rcu; + rcu_read_unlock(); + goto out_unlock_cgroup; } get_task_struct(tsk); rcu_read_unlock(); + percpu_down_write(&cgroup_threadgroup_rwsem); + if (threadgroup) { + if (!thread_group_leader(tsk)) { + /* + * a race with de_thread from another thread's exec() + * may strip us of our leadership, if this happens, + * there is no choice but to throw this task away and + * try again; this is + * "double-double-toil-and-trouble-check locking". + */ + percpu_up_write(&cgroup_threadgroup_rwsem); + put_task_struct(tsk); + goto retry_find_task; + } + } + ret = cgroup_procs_write_permission(tsk, cgrp, of); if (!ret) ret = cgroup_attach_task(cgrp, tsk, threadgroup); - put_task_struct(tsk); - goto out_unlock_threadgroup; - -out_unlock_rcu: - rcu_read_unlock(); -out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); + + put_task_struct(tsk); +out_unlock_cgroup: cgroup_kn_unlock(of->kn); return ret ?: nbytes; } @@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); - percpu_down_write(&cgroup_threadgroup_rwsem); - /* look up all csses currently attached to @cgrp's subtree */ down_read(&css_set_rwsem); css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { @@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; last_task = task; + percpu_down_write(&cgroup_threadgroup_rwsem); + /* raced against de_thread() from another thread? */ + if (!thread_group_leader(task)) { + percpu_up_write(&cgroup_threadgroup_rwsem); + put_task_struct(task); + continue; + } + ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); + percpu_up_write(&cgroup_threadgroup_rwsem); put_task_struct(task); if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) @@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) out_finish: cgroup_migrate_finish(&preloaded_csets); - percpu_up_write(&cgroup_threadgroup_rwsem); return ret; } -- cgit v0.10.2 From 0c986253b939cc14c69d4adbe2b4121bdf4aa220 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2015 11:51:12 -0400 Subject: Revert "sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem" This reverts commit d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14. d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Reported-by: Christian Borntraeger Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Paolo Bonzini Cc: stable@vger.kernel.org # v4.2+ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4d8fcf2..8492721 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -473,31 +473,8 @@ struct cgroup_subsys { unsigned int depends_on; }; -extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; - -/** - * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. - */ -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) -{ - percpu_down_read(&cgroup_threadgroup_rwsem); -} - -/** - * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). - */ -static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) -{ - percpu_up_read(&cgroup_threadgroup_rwsem); -} +void cgroup_threadgroup_change_begin(struct task_struct *tsk); +void cgroup_threadgroup_change_end(struct task_struct *tsk); #else /* CONFIG_CGROUPS */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d0b380e..e38681f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,6 +25,13 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +#ifdef CONFIG_CGROUPS +#define INIT_GROUP_RWSEM(sig) \ + .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), +#else +#define INIT_GROUP_RWSEM(sig) +#endif + #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -57,6 +64,7 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ + INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9da..b7b9501 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -762,6 +762,18 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + /* + * group_rwsem prevents new tasks from entering the threadgroup and + * member tasks from exiting,a more specifically, setting of + * PF_EXITING. fork and exit paths are protected with this rwsem + * using threadgroup_change_begin/end(). Users which require + * threadgroup to remain stable should use threadgroup_[un]lock() + * which also takes care of exec path. Currently, cgroup is the + * only user. + */ + struct rw_semaphore group_rwsem; +#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 115091e..2c9eae6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); */ static DEFINE_SPINLOCK(release_agent_path_lock); -struct percpu_rw_semaphore cgroup_threadgroup_rwsem; - #define cgroup_assert_mutex_or_rcu_locked() \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&cgroup_mutex), \ @@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset, return cset; } +void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + down_read(&tsk->signal->group_rwsem); +} + +void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + up_read(&tsk->signal->group_rwsem); +} + +/** + * threadgroup_lock - lock threadgroup + * @tsk: member task of the threadgroup to lock + * + * Lock the threadgroup @tsk belongs to. No new task is allowed to enter + * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or + * change ->group_leader/pid. This is useful for cases where the threadgroup + * needs to stay stable across blockable operations. + * + * fork and exit explicitly call threadgroup_change_{begin|end}() for + * synchronization. While held, no new task will be added to threadgroup + * and no existing live task will have its PF_EXITING set. + * + * de_thread() does threadgroup_change_{begin|end}() when a non-leader + * sub-thread becomes a new leader. + */ +static void threadgroup_lock(struct task_struct *tsk) +{ + down_write(&tsk->signal->group_rwsem); +} + +/** + * threadgroup_unlock - unlock threadgroup + * @tsk: member task of the threadgroup to unlock + * + * Reverse threadgroup_lock(). + */ +static inline void threadgroup_unlock(struct task_struct *tsk) +{ + up_write(&tsk->signal->group_rwsem); +} + static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) { struct cgroup *root_cgrp = kf_root->kn->priv; @@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, lockdep_assert_held(&css_set_rwsem); /* - * We are synchronized through cgroup_threadgroup_rwsem against - * PF_EXITING setting such that we can't race against cgroup_exit() - * changing the css_set to init_css_set and dropping the old one. + * We are synchronized through threadgroup_lock() against PF_EXITING + * setting such that we can't race against cgroup_exit() changing the + * css_set to init_css_set and dropping the old one. */ WARN_ON_ONCE(tsk->flags & PF_EXITING); old_cset = task_css_set(tsk); @@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) * @src_cset and add it to @preloaded_csets, which should later be cleaned * up by cgroup_migrate_finish(). * - * This function may be called without holding cgroup_threadgroup_rwsem - * even if the target is a process. Threads may be created and destroyed - * but as long as cgroup_mutex is not dropped, no new css_set can be put - * into play and the preloaded css_sets are guaranteed to cover all - * migrations. + * This function may be called without holding threadgroup_lock even if the + * target is a process. Threads may be created and destroyed but as long + * as cgroup_mutex is not dropped, no new css_set can be put into play and + * the preloaded css_sets are guaranteed to cover all migrations. */ static void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, @@ -2240,7 +2278,7 @@ err: * @threadgroup: whether @leader points to the whole process or a single task * * Migrate a process or task denoted by @leader to @cgrp. If migrating a - * process, the caller must be holding cgroup_threadgroup_rwsem. The + * process, the caller must be holding threadgroup_lock of @leader. The * caller is also responsible for invoking cgroup_migrate_add_src() and * cgroup_migrate_prepare_dst() on the targets before invoking this * function and following up with cgroup_migrate_finish(). @@ -2368,7 +2406,7 @@ out_release_tset: * @leader: the task or the leader of the threadgroup to be attached * @threadgroup: attach the whole threadgroup? * - * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. + * Call holding cgroup_mutex and threadgroup_lock of @leader. */ static int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup) @@ -2490,7 +2528,7 @@ retry_find_task: get_task_struct(tsk); rcu_read_unlock(); - percpu_down_write(&cgroup_threadgroup_rwsem); + threadgroup_lock(tsk); if (threadgroup) { if (!thread_group_leader(tsk)) { /* @@ -2500,7 +2538,7 @@ retry_find_task: * try again; this is * "double-double-toil-and-trouble-check locking". */ - percpu_up_write(&cgroup_threadgroup_rwsem); + threadgroup_unlock(tsk); put_task_struct(tsk); goto retry_find_task; } @@ -2510,7 +2548,7 @@ retry_find_task: if (!ret) ret = cgroup_attach_task(cgrp, tsk, threadgroup); - percpu_up_write(&cgroup_threadgroup_rwsem); + threadgroup_unlock(tsk); put_task_struct(tsk); out_unlock_cgroup: @@ -2713,17 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; last_task = task; - percpu_down_write(&cgroup_threadgroup_rwsem); + threadgroup_lock(task); /* raced against de_thread() from another thread? */ if (!thread_group_leader(task)) { - percpu_up_write(&cgroup_threadgroup_rwsem); + threadgroup_unlock(task); put_task_struct(task); continue; } ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); - percpu_up_write(&cgroup_threadgroup_rwsem); + threadgroup_unlock(task); put_task_struct(task); if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) @@ -5045,7 +5083,6 @@ int __init cgroup_init(void) unsigned long key; int ssid, err; - BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); diff --git a/kernel/fork.c b/kernel/fork.c index 7d5f0f1..2845623 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); sched_autogroup_fork(sig); +#ifdef CONFIG_CGROUPS + init_rwsem(&sig->group_rwsem); +#endif + sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; -- cgit v0.10.2 From c2f58514cfb374d5368c9da945f1765cd48eb0da Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Wed, 5 Aug 2015 11:53:57 +0100 Subject: arm/arm64: KVM: vgic: Check for !irqchip_in_kernel() when mapping resources Until b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops"), kvm_vgic_map_resources() used to include a check on irqchip_in_kernel(), and vgic_v2_map_resources() still has it. But now vm_ops are not initialized until we call kvm_vgic_create(). Therefore kvm_vgic_map_resources() can being called without a VGIC, and we die because vm_ops.map_resources is NULL. Fixing this restores QEMU's kernel-irqchip=off option to a working state, allowing to use GIC emulation in userspace. Fixes: b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops") Cc: stable@vger.kernel.org Signed-off-by: Pavel Fedin [maz: reworked commit message] Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ce404a5..dc017ad 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -446,7 +446,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { + if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { ret = kvm_vgic_map_resources(kvm); if (ret) return ret; -- cgit v0.10.2 From 0243ed44ad4a25dbd2e92ad97e5e12a1a6c72d6c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 15 Sep 2015 04:59:21 -0700 Subject: spi: fix kernel-doc warnings in spi.h Fix the following 'make htmldocs' warnings: .//include/linux/spi/spi.h:71: warning: No description found for parameter 'lock' .//include/linux/spi/spi.h:71: warning: Excess struct/union/enum/typedef member 'clock' description in 'spi_statistics' Signed-off-by: Geliang Tang Signed-off-by: Mark Brown diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 269e8af..6b00f18 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -34,7 +34,7 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @clock: lock protecting this structure + * @lock: lock protecting this structure * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled -- cgit v0.10.2 From e26d15f735f570a1178c3bba4d85e6f58e098fdd Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Sep 2015 14:46:45 +0200 Subject: spi: mediatek: fix wrong error return value on probe Commit adcbcfea15d62 ("spi: mediatek: fix spi clock usage error") added a new sel_clk but introduced bugs in the error paths since the wrong struct clk pointers are passed to PTR_ERR(). Fixes: adcbcfea15d62 ("spi: mediatek: fix spi clock usage error") Signed-off-by: Javier Martinez Canillas Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6fbb5e5..e9839a4 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -585,14 +585,14 @@ static int mtk_spi_probe(struct platform_device *pdev) mdata->sel_clk = devm_clk_get(&pdev->dev, "sel-clk"); if (IS_ERR(mdata->sel_clk)) { - ret = PTR_ERR(mdata->spi_clk); + ret = PTR_ERR(mdata->sel_clk); dev_err(&pdev->dev, "failed to get sel-clk: %d\n", ret); goto err_put_master; } mdata->spi_clk = devm_clk_get(&pdev->dev, "spi-clk"); if (IS_ERR(mdata->spi_clk)) { - ret = PTR_ERR(mdata->parent_clk); + ret = PTR_ERR(mdata->spi_clk); dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret); goto err_put_master; } -- cgit v0.10.2 From 3c8f7710c1c44fb650bc29b6ef78ed8b60cfaa28 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 15 Sep 2015 20:51:31 +0200 Subject: ASoC: fix broken pxa SoC support The previous fix of pxa library support, which was introduced to fix the library dependency, broke the previous SoC behavior, where a machine code binding pxa2xx-ac97 with a coded relied on : - sound/soc/pxa/pxa2xx-ac97.c - sound/soc/codecs/XXX.c For example, the mioa701_wm9713.c machine code is currently broken. The "select ARM" statement wrongly selects the soc/arm/pxa2xx-ac97 for compilation, as per an unfortunate fate SND_PXA2XX_AC97 is both declared in sound/arm/Kconfig and sound/soc/pxa/Kconfig. Fix this by ensuring that SND_PXA2XX_SOC correctly triggers the correct pxa2xx-ac97 compilation. Fixes: 846172dfe33c ("ASoC: fix SND_PXA2XX_LIB Kconfig warning") Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown diff --git a/sound/arm/Kconfig b/sound/arm/Kconfig index 885683a..e040621 100644 --- a/sound/arm/Kconfig +++ b/sound/arm/Kconfig @@ -9,6 +9,14 @@ menuconfig SND_ARM Drivers that are implemented on ASoC can be found in "ALSA for SoC audio support" section. +config SND_PXA2XX_LIB + tristate + select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 + select SND_DMAENGINE_PCM + +config SND_PXA2XX_LIB_AC97 + bool + if SND_ARM config SND_ARMAACI @@ -21,13 +29,6 @@ config SND_PXA2XX_PCM tristate select SND_PCM -config SND_PXA2XX_LIB - tristate - select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 - -config SND_PXA2XX_LIB_AC97 - bool - config SND_PXA2XX_AC97 tristate "AC97 driver for the Intel PXA2xx chip" depends on ARCH_PXA diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 39cea80..f2bf866 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,7 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA - select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to @@ -25,7 +24,6 @@ config SND_PXA2XX_AC97 config SND_PXA2XX_SOC_AC97 tristate select AC97_BUS - select SND_ARM select SND_PXA2XX_LIB_AC97 select SND_SOC_AC97_BUS -- cgit v0.10.2 From 0f1d08dd6aaf7e9dd1039dce910c1f8ff58ee8c1 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 15 Sep 2015 15:34:22 -0500 Subject: regulator: tps65218: Fix missing zero typo Add missing zero to value. This will be needed when range checking is implemented. Signed-off-by: Andrew F. Davis Acked-by: Dan Murphy Signed-off-by: Mark Brown diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c index 7f97223..a02c1b9 100644 --- a/drivers/regulator/tps65218-regulator.c +++ b/drivers/regulator/tps65218-regulator.c @@ -73,7 +73,7 @@ static const struct regulator_linear_range dcdc4_ranges[] = { }; static struct tps_info tps65218_pmic_regs[] = { - TPS65218_INFO(DCDC1, "DCDC1", 850000, 167500), + TPS65218_INFO(DCDC1, "DCDC1", 850000, 1675000), TPS65218_INFO(DCDC2, "DCDC2", 850000, 1675000), TPS65218_INFO(DCDC3, "DCDC3", 900000, 3400000), TPS65218_INFO(DCDC4, "DCDC4", 1175000, 3400000), -- cgit v0.10.2 From e1595d89ae8180e0d3815cc75336ac3484de0aa0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 10 Sep 2015 15:55:21 +0200 Subject: clk: tegra: dfll: Properly protect OPP list The OPP list needs to be protected against concurrent accesses. Using simple RCU read locks does the trick and gets rid of the following lockdep warning: =============================== [ INFO: suspicious RCU usage. ] 4.2.0-next-20150908 #1 Not tainted ------------------------------- drivers/base/power/opp.c:460 Missing rcu_read_lock() or dev_opp_list_lock protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by kworker/u8:0/6: #0: ("%s""deferwq"){++++.+}, at: [] process_one_work+0x118/0x4bc #1: (deferred_probe_work){+.+.+.}, at: [] process_one_work+0x118/0x4bc #2: (&dev->mutex){......}, at: [] __device_attach+0x20/0x118 #3: (prepare_lock){+.+...}, at: [] clk_prepare_lock+0x10/0xf8 stack backtrace: CPU: 2 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.0-next-20150908 #1 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: deferwq deferred_probe_work_func [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x94/0xd4) [] (dump_stack) from [] (dev_pm_opp_find_freq_ceil+0x108/0x114) [] (dev_pm_opp_find_freq_ceil) from [] (dfll_calculate_rate_request+0xb8/0x170) [] (dfll_calculate_rate_request) from [] (dfll_clk_round_rate+0x1c/0x2c) [] (dfll_clk_round_rate) from [] (clk_calc_new_rates+0x1b8/0x228) [] (clk_calc_new_rates) from [] (clk_core_set_rate_nolock+0x44/0xac) [] (clk_core_set_rate_nolock) from [] (clk_set_rate+0x24/0x34) [] (clk_set_rate) from [] (tegra124_cpufreq_probe+0x120/0x230) [] (tegra124_cpufreq_probe) from [] (platform_drv_probe+0x44/0xac) [] (platform_drv_probe) from [] (driver_probe_device+0x218/0x304) [] (driver_probe_device) from [] (bus_for_each_drv+0x60/0x94) [] (bus_for_each_drv) from [] (__device_attach+0xb4/0x118) ata1: SATA link down (SStatus 0 SControl 300) [] (__device_attach) from [] (bus_probe_device+0x88/0x90) [] (bus_probe_device) from [] (deferred_probe_work_func+0x58/0x8c) [] (deferred_probe_work_func) from [] (process_one_work+0x188/0x4bc) [] (process_one_work) from [] (worker_thread+0x4c/0x4f4) [] (worker_thread) from [] (kthread+0xe4/0xf8) [] (kthread) from [] (ret_from_fork+0x14/0x24) Signed-off-by: Thierry Reding Fixes: c4fe70ada40f ("clk: tegra: Add closed loop support for the DFLL") [vince.h@nvidia.com: Unlock rcu on error path] Signed-off-by: Vince Hsu [sboyd@codeaurora.org: Dropped second hunk that nested the rcu read lock unnecessarily] Signed-off-by: Stephen Boyd diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c index c2ff859..c4e3a52 100644 --- a/drivers/clk/tegra/clk-dfll.c +++ b/drivers/clk/tegra/clk-dfll.c @@ -682,11 +682,17 @@ static int find_lut_index_for_rate(struct tegra_dfll *td, unsigned long rate) struct dev_pm_opp *opp; int i, uv; + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(td->soc->dev, &rate); - if (IS_ERR(opp)) + if (IS_ERR(opp)) { + rcu_read_unlock(); return PTR_ERR(opp); + } uv = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + for (i = 0; i < td->i2c_lut_size; i++) { if (regulator_list_voltage(td->vdd_reg, td->i2c_lut[i]) == uv) return i; -- cgit v0.10.2 From 9054a31d603ea82c6ed4914170a8708812a16324 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sun, 15 Feb 2015 12:33:49 +0000 Subject: clk: check for invalid parent index of orphans in __clk_init() If a mux clock is initialised (by hardware or firmware) with an invalid parent, its ->get_parent() can return an out of range index. For example, the generic mux clock attempts to return -EINVAL, which due to the u8 return type ends up a rather large number. Using this index with the parent_names[] array results in an invalid pointer and (usually) a crash in the following strcmp(). This patch adds a check for the parent index being in range, ignoring clocks reporting invalid values. Signed-off-by: Mans Rullgard Tested-by: Rhyland Klein Signed-off-by: Stephen Boyd diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 43e2c3a..0ebcf44 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2437,7 +2437,8 @@ static int __clk_init(struct device *dev, struct clk *clk_user) hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { if (orphan->num_parents && orphan->ops->get_parent) { i = orphan->ops->get_parent(orphan->hw); - if (!strcmp(core->name, orphan->parent_names[i])) + if (i >= 0 && i < orphan->num_parents && + !strcmp(core->name, orphan->parent_names[i])) clk_core_reparent(orphan, core); continue; } -- cgit v0.10.2 From 12fc7306e6ffae4e86680892f2286063d7d6eae7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 16 Sep 2015 11:08:49 +0100 Subject: ARM: get rid of needless #if in signal handling code Remove the #if statement which caused trouble for kernels that support both ARMv6 and ARMv7. Older architectures do not implement these bits, so it should be safe to always clear them. Signed-off-by: Russell King diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 586eef2..29e5dc7 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -343,7 +343,6 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 6 /* * Clear the If-Then Thumb-2 execution state. ARM spec * requires this to be all 000s in ARM mode. Snapdragon @@ -352,11 +351,10 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, * * We must do this whenever we are running on a Thumb-2 * capable CPU, which includes ARMv6T2. However, we elect - * to do this whenever we're on an ARMv6 or later CPU for - * simplicity. + * to always do this to simplify the code; this field is + * marked UNK/SBZP for older architectures. */ cpsr &= ~PSR_IT_MASK; -#endif if (thumb) { cpsr |= PSR_T_BIT; -- cgit v0.10.2 From 90cde5584a34e210850728bebee4c6fb1a7124ba Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 14 Sep 2015 17:49:02 +0100 Subject: ARM: 8437/1: dma-mapping: fix build warning with new DMA_ERROR_CODE definition Commit 96231b2686b5: ("ARM: 8419/1: dma-mapping: harmonize definition of DMA_ERROR_CODE") changed the definition of DMA_ERROR_CODE to use dma_addr_t, which makes the compiler barf on assigning this to an "int" variable on ARM with LPAE enabled: ************* In file included from /src/linux/include/linux/dma-mapping.h:86:0, from /src/linux/arch/arm/mm/dma-mapping.c:21: /src/linux/arch/arm/mm/dma-mapping.c: In function '__iommu_create_mapping': /src/linux/arch/arm/include/asm/dma-mapping.h:16:24: warning: overflow in implicit constant conversion [-Woverflow] #define DMA_ERROR_CODE (~(dma_addr_t)0x0) ^ /src/linux/arch/arm/mm/dma-mapping.c:1252:15: note: in expansion of macro DMA_ERROR_CODE' int i, ret = DMA_ERROR_CODE; ^ ************* Remove the actually unneeded initialization of "ret" in __iommu_create_mapping() and move the variable declaration inside the for-loop to make the scope of this variable more clear. Signed-off-by: Andre Przywara Signed-off-by: Russell King diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index cba12f3..3594543 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1257,7 +1257,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t dma_addr, iova; - int i, ret = DMA_ERROR_CODE; + int i; dma_addr = __alloc_iova(mapping, size); if (dma_addr == DMA_ERROR_CODE) @@ -1265,6 +1265,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) iova = dma_addr; for (i = 0; i < count; ) { + int ret; + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; phys_addr_t phys = page_to_phys(pages[i]); unsigned int len, j; -- cgit v0.10.2 From 7ae85dc7687c7e7119053d83d02c560ea217b772 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Wed, 26 Aug 2015 18:26:49 +0100 Subject: ARM: 8425/1: kgdb: Don't try to stop the machine when setting breakpoints In (23a4e40 arm: kgdb: Handle read-only text / modules) we moved to using patch_text() to set breakpoints so that we could handle the case when we had CONFIG_DEBUG_RODATA. That patch used patch_text(). Unfortunately, patch_text() assumes that we're not in atomic context when it runs since it needs to grab a mutex and also wait for other CPUs to stop (which it does with a completion). This would result in a stack crawl if you had CONFIG_DEBUG_ATOMIC_SLEEP and tried to set a breakpoint in kgdb. The crawl looked something like: BUG: scheduling while atomic: swapper/0/0/0x00010007 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-rc7-00133-geb63b34 #1073 Hardware name: Rockchip (Device Tree) (unwind_backtrace) from [] (show_stack+0x20/0x24) (show_stack) from [] (dump_stack+0x84/0xb8) (dump_stack) from [] (__schedule_bug+0x54/0x6c) (__schedule_bug) from [] (__schedule+0x80/0x668) (__schedule) from [] (schedule+0xb8/0xd4) (schedule) from [] (schedule_timeout+0x2c/0x234) (schedule_timeout) from [] (wait_for_common+0xf4/0x188) (wait_for_common) from [] (wait_for_completion+0x20/0x24) (wait_for_completion) from [] (__stop_cpus+0x58/0x70) (__stop_cpus) from [] (stop_cpus+0x3c/0x54) (stop_cpus) from [] (__stop_machine+0xcc/0xe8) (__stop_machine) from [] (stop_machine+0x34/0x44) (stop_machine) from [] (patch_text+0x28/0x34) (patch_text) from [] (kgdb_arch_set_breakpoint+0x40/0x4c) (kgdb_arch_set_breakpoint) from [] (kgdb_validate_break_address+0x2c/0x60) (kgdb_validate_break_address) from [] (dbg_set_sw_break+0x1c/0xdc) (dbg_set_sw_break) from [] (gdb_serial_stub+0x9c4/0xba4) (gdb_serial_stub) from [] (kgdb_cpu_enter+0x1f8/0x60c) (kgdb_cpu_enter) from [] (kgdb_handle_exception+0x19c/0x1d0) (kgdb_handle_exception) from [] (kgdb_compiled_brk_fn+0x30/0x3c) (kgdb_compiled_brk_fn) from [] (do_undefinstr+0x1a4/0x20c) (do_undefinstr) from [] (__und_svc_finish+0x0/0x34) It turns out that when we're in kgdb all the CPUs are stopped anyway so there's no reason we should be calling patch_text(). We can instead directly call __patch_text() which assumes that CPUs have already been stopped. Fixes: 23a4e4050ba9 ("arm: kgdb: Handle read-only text / modules") Reported-by: Aapo Vienamo Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Acked-by: Kees Cook Signed-off-by: Russell King diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a6ad93c..fd9eefc 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -259,15 +259,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return err; - patch_text((void *)bpt->bpt_addr, - *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); return 0; } -- cgit v0.10.2 From 1cd03890ea64795e53f17a94928cca22495acb2a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 16 Sep 2015 12:04:51 +0200 Subject: powerpc32: memcpy: only use dcbz once cache is enabled memcpy() uses instruction dcbz to speed up copy by not wasting time loading cache line with data that will be overwritten. Some platform like mpc52xx do no have cache active at startup and can therefore not use memcpy(). Allthough no part of the code explicitly uses memcpy(), GCC makes calls to it. This patch modifies memcpy() such that at startup, memcpy() unconditionally jumps to generic_memcpy() which doesn't use the dcbz instruction. Once the initial MMU is set up, in machine_init() we patch memcpy() by replacing this inconditional jump by a NOP Reported-by: Michal Sojka Tested-by: Thomas Gleixner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index bb02e9f..b316ab7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -38,6 +38,7 @@ #include #include #include +#include #define DBG(fmt...) @@ -116,6 +117,8 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); + patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 2ef50c6..da5847d 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -128,6 +128,10 @@ _GLOBAL(memset) * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memmove) cmplw 0,r3,r4 @@ -135,6 +139,7 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) + b generic_memcpy add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 -- cgit v0.10.2 From 400c47d81ca383fc87d5a3937b234e23e26909fb Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 16 Sep 2015 12:04:53 +0200 Subject: powerpc32: memset: only use dcbz once cache is enabled memset() uses instruction dcbz to speed up clearing by not wasting time loading cache line with data that will be overwritten. Some platform like mpc52xx do no have cache active at startup and can therefore not use memset(). Allthough no part of the code explicitly uses memset(), GCC may make calls to it. This patch modifies memset() such that at startup, memset() unconditionally skip the optimised bloc that uses dcbz instruction. Once the initial MMU is set up, in machine_init() we patch memset() by replacing this inconditional jump by a NOP Tested-by: Thomas Gleixner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b316ab7..ad8c9db 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -110,6 +110,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ +extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ + notrace void __init machine_init(u64 dt_ptr) { lockdep_init(); @@ -118,6 +120,7 @@ notrace void __init machine_init(u64 dt_ptr) udbg_early_init(); patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction(&memset_nocache_branch, PPC_INST_NOP); /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index da5847d..c44df2d 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -73,6 +73,10 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore skip the optimised bloc that uses dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memset) rlwimi r4,r4,8,16,23 @@ -88,6 +92,8 @@ _GLOBAL(memset) subf r6,r0,r6 cmplwi 0,r4,0 bne 2f /* Use normal procedure if r4 is not zero */ +_GLOBAL(memset_nocache_branch) + b 2f /* Skip optimised bloc until cache is enabled */ clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 -- cgit v0.10.2 From 93a51aa406f61985d12b60f9860f949b4bf7e43d Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:35:07 +0200 Subject: leds: aat1290: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-aat1290.c b/drivers/leds/leds-aat1290.c index fd7c25f..8ceb471 100644 --- a/drivers/leds/leds-aat1290.c +++ b/drivers/leds/leds-aat1290.c @@ -559,6 +559,7 @@ static const struct of_device_id aat1290_led_dt_match[] = { { .compatible = "skyworks,aat1290" }, {}, }; +MODULE_DEVICE_TABLE(of, aat1290_led_dt_match); static struct platform_driver aat1290_led_driver = { .probe = aat1290_led_probe, -- cgit v0.10.2 From 6528ca19c1c8d4756b534a1ce9df4c3849f9dd04 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:35:38 +0200 Subject: leds: bcm6328: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c index 986fe1e..1793727 100644 --- a/drivers/leds/leds-bcm6328.c +++ b/drivers/leds/leds-bcm6328.c @@ -395,6 +395,7 @@ static const struct of_device_id bcm6328_leds_of_match[] = { { .compatible = "brcm,bcm6328-leds", }, { }, }; +MODULE_DEVICE_TABLE(of, bcm6328_leds_of_match); static struct platform_driver bcm6328_leds_driver = { .probe = bcm6328_leds_probe, -- cgit v0.10.2 From 01736f07cad613937db13f7483b04e8f2af81883 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:35:55 +0200 Subject: leds: bcm6358: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-bcm6358.c b/drivers/leds/leds-bcm6358.c index 21f9693..7ea3526 100644 --- a/drivers/leds/leds-bcm6358.c +++ b/drivers/leds/leds-bcm6358.c @@ -226,6 +226,7 @@ static const struct of_device_id bcm6358_leds_of_match[] = { { .compatible = "brcm,bcm6358-leds", }, { }, }; +MODULE_DEVICE_TABLE(of, bcm6358_leds_of_match); static struct platform_driver bcm6358_leds_driver = { .probe = bcm6358_leds_probe, -- cgit v0.10.2 From 3682c7bdf5065b99638c56adc1be1507ed5c0f0e Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:36:15 +0200 Subject: leds: ktd2692: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-ktd2692.c b/drivers/leds/leds-ktd2692.c index 2ae8c4d..feca07b 100644 --- a/drivers/leds/leds-ktd2692.c +++ b/drivers/leds/leds-ktd2692.c @@ -426,6 +426,7 @@ static const struct of_device_id ktd2692_match[] = { { .compatible = "kinetic,ktd2692", }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, ktd2692_match); static struct platform_driver ktd2692_driver = { .driver = { -- cgit v0.10.2 From e3df661ff3243ce4d1719c212ee88521e0e7811b Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:36:41 +0200 Subject: leds: max77693: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-max77693.c b/drivers/leds/leds-max77693.c index df348a0..afbb140 100644 --- a/drivers/leds/leds-max77693.c +++ b/drivers/leds/leds-max77693.c @@ -1080,6 +1080,7 @@ static const struct of_device_id max77693_led_dt_match[] = { { .compatible = "maxim,max77693-led" }, {}, }; +MODULE_DEVICE_TABLE(of, max77693_led_dt_match); static struct platform_driver max77693_led_driver = { .probe = max77693_led_probe, -- cgit v0.10.2 From 98f9cc7ffcf2cb5b695208a4cd4927bb41c550f3 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 1 Sep 2015 23:36:59 +0200 Subject: leds: leds-ns2: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index b33514d..a95a612 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -337,6 +337,7 @@ static const struct of_device_id of_ns2_leds_match[] = { { .compatible = "lacie,ns2-leds", }, {}, }; +MODULE_DEVICE_TABLE(of, of_ns2_leds_match); #endif /* CONFIG_OF_GPIO */ struct ns2_led_priv { -- cgit v0.10.2 From 305c324f43a2601cbeb894c17b68c8a8f3c13387 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Fri, 4 Sep 2015 12:27:09 +0200 Subject: leds: aat1290: add 'static' modifier to init_mm_current_scale Function init_mm_current_scale is used only locally. Make it static then. Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/leds-aat1290.c b/drivers/leds/leds-aat1290.c index 8ceb471..ac77d36 100644 --- a/drivers/leds/leds-aat1290.c +++ b/drivers/leds/leds-aat1290.c @@ -331,7 +331,7 @@ static void aat1290_led_validate_mm_current(struct aat1290_led *led, cfg->max_brightness = b + 1; } -int init_mm_current_scale(struct aat1290_led *led, +static int init_mm_current_scale(struct aat1290_led *led, struct aat1290_led_config_data *cfg) { int max_mm_current_percent[] = { 20, 22, 25, 28, 32, 36, 40, 45, 50, 56, -- cgit v0.10.2 From 35bfe456be3ae1e11dd63e83096c95d0c6429c51 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Mon, 7 Sep 2015 17:06:05 +0200 Subject: leds: leds-ipaq-micro: Add LEDS_CLASS dependency Fix missing Kconfig LEDS_CLASS dependency. Signed-off-by: Jacek Anaszewski Reviewed-by: Linus Walleij diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 70f4255..9444d93 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -170,6 +170,7 @@ config LEDS_SUNFIRE config LEDS_IPAQ_MICRO tristate "LED Support for the Compaq iPAQ h3xxx" + depends on LEDS_CLASS depends on MFD_IPAQ_MICRO help Choose this option if you want to use the notification LED on -- cgit v0.10.2 From 2338f73d407d5abe2036d92716ba25ef5279c3d2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 7 Sep 2015 14:25:01 +0200 Subject: leds:lp55xx: Correct Kconfig dependency for f/w user helper The commit [b67893206fc0: leds:lp55xx: fix firmware loading error] tries to address the firmware file handling with user helper, but it sets a wrong Kconfig CONFIG_FW_LOADER_USER_HELPER_FALLBACK. Since the wrong option was enabled, the system got a regression -- it suffers from the unexpected long delays for non-present firmware files. This patch corrects the Kconfig dependency to the right one, CONFIG_FW_LOADER_USER_HELPER. This doesn't change the fallback behavior but only enables UMH when needed. Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=944661 Fixes: b67893206fc0 ('leds:lp55xx: fix firmware loading error') Cc: # v4.2+ Signed-off-by: Takashi Iwai Signed-off-by: Jacek Anaszewski diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 9444d93..42990f2 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -230,7 +230,7 @@ config LEDS_LP55XX_COMMON tristate "Common Driver for TI/National LP5521/5523/55231/5562/8501" depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 || LEDS_LP8501 select FW_LOADER - select FW_LOADER_USER_HELPER_FALLBACK + select FW_LOADER_USER_HELPER help This option supports common operations for LP5521/5523/55231/5562/8501 devices. -- cgit v0.10.2 From b07bb761cf6a333b2108cdbe4ffee66407189bb1 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 16 Sep 2015 08:38:08 -0700 Subject: drm/vmwgfx: Only build on X86 ioremap_cache() is currently not available on some architectures. Reported-by: kbuild test robot Signed-off-by: Thomas Hellstrom diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index 67720f7..b49445d 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -1,6 +1,6 @@ config DRM_VMWGFX tristate "DRM driver for VMware Virtual GPU" - depends on DRM && PCI + depends on DRM && PCI && X86 select FB_DEFERRED_IO select FB_CFB_FILLRECT select FB_CFB_COPYAREA -- cgit v0.10.2 From d8949aad3eab5d396f4fefcd581773bf07b9a79e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 4 Sep 2015 12:22:46 +0300 Subject: Bluetooth: Delay check for conn->smp in smp_conn_security() There are several actions that smp_conn_security() might make that do not require a valid SMP context (conn->smp pointer). One of these actions is to encrypt the link with an existing LTK. If the SMP context wasn't initialized properly we should still allow the independent actions to be done, i.e. the check for the context should only be done at the last possible moment. Reported-by: Chuck Ebbert Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # 4.0+ diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index ad82324..0510a57 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2311,12 +2311,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!conn) return 1; - chan = conn->smp; - if (!chan) { - BT_ERR("SMP security requested but not available"); - return 1; - } - if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; @@ -2330,6 +2324,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; + chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } + l2cap_chan_lock(chan); /* If SMP is already in progress ignore this request */ -- cgit v0.10.2 From e56d82a116176f7af9d642b560abbbd3a2b68013 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 15:31:24 +0100 Subject: arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers We have a couple of CPU hotplug notifiers for resetting the CPU debug state to a sane value when a CPU comes online. This patch ensures that we mask out CPU_TASKS_FROZEN so that we don't miss any online events occuring due to suspend/resume. Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3b62a..cebf786 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index c97040e..bba85c8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } -- cgit v0.10.2 From bdec97a855ef1e239f130f7a11584721c9a1bf04 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2015 12:07:06 +0100 Subject: arm64: compat: fix vfp save/restore across signal handlers in big-endian When saving/restoring the VFP registers from a compat (AArch32) signal frame, we rely on the compat registers forming a prefix of the native register file and therefore make use of copy_{to,from}_user to transfer between the native fpsimd_state and the compat_vfp_sigframe. Unfortunately, this doesn't work so well in a big-endian environment. Our fpsimd save/restore code operates directly on 128-bit quantities (Q registers) whereas the compat_vfp_sigframe represents the registers as an array of 64-bit (D) registers. The architecture packs the compat D registers into the Q registers, with the least significant bytes holding the lower register. Consequently, we need to swap the 64-bit halves when converting between these two representations on a big-endian machine. This patch replaces the __copy_{to,from}_user invocations in our compat VFP signal handling code with explicit __put_user loops that operate on 64-bit values and swap them accordingly. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 948f0ad..71ef6dc 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); -- cgit v0.10.2 From df057cc7b4fa59e9b55f07ffdb6c62bf02e99a00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Mar 2015 12:15:02 +0000 Subject: arm64: errata: add module build workaround for erratum #843419 Cortex-A53 processors <= r0p4 are affected by erratum #843419 which can lead to a memory access using an incorrect address in certain sequences headed by an ADRP instruction. There is a linker fix to generate veneers for ADRP instructions, but this doesn't work for kernel modules which are built as unlinked ELF objects. This patch adds a new config option for the erratum which, when enabled, builds kernel modules with the mcmodel=large flag. This uses absolute addressing for all kernel symbols, thereby removing the use of ADRP as a PC-relative form of addressing. The ADRP relocs are removed from the module loader so that we fail to load any potentially affected modules. Cc: Acked-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8b6e378..07d1811 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -332,6 +332,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15ff5b4..f9914d7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf410..876eb8d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; -- cgit v0.10.2 From ad5001cc7cdf9aaee5eb213fdee657e4a3c94776 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 17 Sep 2015 13:37:00 +0200 Subject: netfilter: nf_log: wait for rcu grace after logger unregistration The nf_log_unregister() function needs to call synchronize_rcu() to make sure that the objects are not dereferenced anymore on module removal. Fixes: 5962815a6a56 ("netfilter: nf_log: use an array of loggers instead of list") Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index a5ebd7d..a5d41df 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -117,6 +117,7 @@ void nf_log_unregister(struct nf_logger *logger) RCU_INIT_POINTER(loggers[i][logger->type], NULL); } mutex_unlock(&nf_log_mutex); + synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unregister); -- cgit v0.10.2 From 88d64253785936d75323c74e7126d180e26de560 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Sep 2015 13:59:08 +0200 Subject: s390/hibernate: fix save and restore of vector registers The swsusp_arch_suspend()/swsusp_arch_resume() functions currently only save and restore the floating point registers. If the task that started the hibernation process is using vector registers they can get lost. To fix this just call save_fpu_regs in swsusp_arch_suspend(), the restore will happen automatically on return to user space. Reported-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index ca62946..2d6b6e8 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -30,6 +30,9 @@ ENTRY(swsusp_arch_suspend) aghi %r15,-STACK_FRAME_OVERHEAD stg %r1,__SF_BACKCHAIN(%r15) + /* Store FPU registers */ + brasl %r14,save_fpu_regs + /* Deactivate DAT */ stnsm __SF_EMPTY(%r15),0xfb @@ -47,23 +50,6 @@ ENTRY(swsusp_arch_suspend) /* Store registers */ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ - stfpc 0x31c(%r1) /* store fpu control */ - std 0,0x200(%r1) /* store f0 */ - std 1,0x208(%r1) /* store f1 */ - std 2,0x210(%r1) /* store f2 */ - std 3,0x218(%r1) /* store f3 */ - std 4,0x220(%r1) /* store f4 */ - std 5,0x228(%r1) /* store f5 */ - std 6,0x230(%r1) /* store f6 */ - std 7,0x238(%r1) /* store f7 */ - std 8,0x240(%r1) /* store f8 */ - std 9,0x248(%r1) /* store f9 */ - std 10,0x250(%r1) /* store f10 */ - std 11,0x258(%r1) /* store f11 */ - std 12,0x260(%r1) /* store f12 */ - std 13,0x268(%r1) /* store f13 */ - std 14,0x270(%r1) /* store f14 */ - std 15,0x278(%r1) /* store f15 */ stam %a0,%a15,0x340(%r1) /* store access registers */ stctg %c0,%c15,0x380(%r1) /* store control registers */ stmg %r0,%r15,0x280(%r1) /* store general registers */ @@ -249,24 +235,6 @@ restore_registers: lctlg %c0,%c15,0x380(%r13) /* load control registers */ lam %a0,%a15,0x340(%r13) /* load access registers */ - lfpc 0x31c(%r13) /* load fpu control */ - ld 0,0x200(%r13) /* load f0 */ - ld 1,0x208(%r13) /* load f1 */ - ld 2,0x210(%r13) /* load f2 */ - ld 3,0x218(%r13) /* load f3 */ - ld 4,0x220(%r13) /* load f4 */ - ld 5,0x228(%r13) /* load f5 */ - ld 6,0x230(%r13) /* load f6 */ - ld 7,0x238(%r13) /* load f7 */ - ld 8,0x240(%r13) /* load f8 */ - ld 9,0x248(%r13) /* load f9 */ - ld 10,0x250(%r13) /* load f10 */ - ld 11,0x258(%r13) /* load f11 */ - ld 12,0x260(%r13) /* load f12 */ - ld 13,0x268(%r13) /* load f13 */ - ld 14,0x270(%r13) /* load f14 */ - ld 15,0x278(%r13) /* load f15 */ - /* Load old stack */ lg %r15,0x2f8(%r13) -- cgit v0.10.2 From 9380cf5a884e237fc0e2571d5adf0b43bb4412c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2015 13:15:00 +0200 Subject: s390: fix floating point register corruption The critical section cleanup code misses to add the offset of the thread_struct to the task address. Therefore, if the critical section code gets executed, it may corrupt the task struct or restore the contents of the floating point registers from the wrong memory location. Fixes d0164ee20d "s390/kernel: remove save_fpu_regs() parameter and use __LC_CURRENT instead". Signed-off-by: Heiko Carstens Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 247b7aa..09b039d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1191,6 +1191,7 @@ cleanup_critical: clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) jhe 1f lg %r2,__LC_CURRENT + aghi %r2,__TASK_thread 0: # Store floating-point controls stfpc __THREAD_FPU_fpc(%r2) 1: # Load register save area and check if VX is active @@ -1252,6 +1253,7 @@ cleanup_critical: clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl) jhe 6f lg %r4,__LC_CURRENT + aghi %r4,__TASK_thread lfpc __THREAD_FPU_fpc(%r4) tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area -- cgit v0.10.2 From 8d4bd0ed0439dfc780aab801a085961925ed6838 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Sep 2015 15:25:39 +0200 Subject: s390/compat: correct uc_sigmask of the compat signal frame The uc_sigmask in the ucontext structure is an array of words to keep the 64 signal bits (or 1024 if you ask glibc but the kernel sigset_t only has 64 bits). For 64 bit the sigset_t contains a single 8 byte word, but for 31 bit there are two 4 byte words. The compat signal handler code uses a simple copy of the 64 bit sigset_t to the 31 bit compat_sigset_t. As s390 is a big-endian architecture this is incorrect, the two words in the 31 bit sigset_t array need to be swapped. Cc: Reported-by: Stefan Liebler Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index eb46642..e0f9d27 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -48,6 +48,19 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; +static inline void sigset_to_sigset32(unsigned long *set64, + compat_sigset_word *set32) +{ + set32[0] = (compat_sigset_word) set64[0]; + set32[1] = (compat_sigset_word)(set64[0] >> 32); +} + +static inline void sigset32_to_sigset(compat_sigset_word *set32, + unsigned long *set64) +{ + set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -281,10 +294,12 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); save_fpu_regs(); if (restore_sigregs32(regs, &frame->sregs)) @@ -302,10 +317,12 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -377,7 +394,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; @@ -438,6 +455,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -485,11 +503,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ + sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; -- cgit v0.10.2 From 58f8e9da33eb0104f5bee3e8b3ca44e1583c78b9 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 7 Sep 2015 10:52:42 +0200 Subject: s390/cpum_cf: Corrected return code for unauthorized counter sets Previously, the cpum_cf PMU returned -EPERM if a counter is requested and the counter set to which the counter belongs is not authorized. According to the perf_event_open() system call manual, an error code of EPERM indicates an unsupported exclude setting or CAP_SYS_ADMIN is missing. Use ENOENT to indicate that particular counters are not available when the counter set which contains the counter is not authorized. For generic events, this might trigger a fall back, for example, to a software event. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad4..a956340 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -157,10 +157,14 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) cpuhw = &get_cpu_var(cpu_hw_events); - /* check authorization for cpu counter sets */ + /* Check authorization for cpu counter sets. + * If the particular CPU counter set is not authorized, + * return with -ENOENT in order to fall back to other + * PMUs that might suffice the event request. + */ ctrs_state = cpumf_state_ctl[hwc->config_base]; if (!(ctrs_state & cpuhw->info.auth_ctl)) - err = -EPERM; + err = -ENOENT; put_cpu_var(cpu_hw_events); return err; @@ -536,7 +540,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) */ if (!(cpuhw->flags & PERF_EVENT_TXN)) if (validate_ctr_auth(&event->hw)) - return -EPERM; + return -ENOENT; ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -611,7 +615,7 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); state >>= CPUMF_LCCTL_ENABLE_SHIFT; if ((state & cpuhw->info.auth_ctl) != state) - return -EPERM; + return -ENOENT; cpuhw->flags &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); -- cgit v0.10.2 From 61cc37906b45534fcc2bea03c17e135ec010b624 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 10 Sep 2015 11:18:20 +0200 Subject: s390/vtime: correct scaled cputime for SMT The scaled cputime is supposed to be derived from the normal per-thread cputime by dividing it with the average thread density in the last interval. The calculation of the scaling values for the average thread density is incorrect. The current, incorrect calculation: Ci = cycle count with i active threads T = unscaled cputime, sT = scaled cputime sT = T * (C1 + C2 + ... + Cn) / (1*C1 + 2*C2 + ... + n*Cn) The calculation happens to yield the correct numbers for the simple cases with only one Ci value not zero. But for cases with multiple Ci values not zero it fails. E.g. on a SMT-2 system with one thread active half the time and two threads active for the other half of the time it fails, the scaling factor should be 3/4 but the formula gives 2/3. The correct formula is sT = T * (C1/1 + C2/2 + ... + Cn/n) / (C1 + C2 + ... + Cn) Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index b9ce650..c865343 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -89,17 +89,21 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) { u64 cycles_new[32], *cycles_old; - u64 delta, mult, div; + u64 delta, fac, mult, div; cycles_old = this_cpu_ptr(mt_cycles); if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) { + fac = 1; mult = div = 0; for (i = 0; i <= smp_cpu_mtid; i++) { delta = cycles_new[i] - cycles_old[i]; - mult += delta; - div += (i + 1) * delta; + div += delta; + mult *= i + 1; + mult += delta * fac; + fac *= i + 1; } - if (mult > 0) { + div *= fac; + if (div > 0) { /* Update scaling factor */ __this_cpu_write(mt_scaling_mult, mult); __this_cpu_write(mt_scaling_div, div); -- cgit v0.10.2 From 022435713c0391020b55706c5b80fa1be44ba321 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2015 09:39:58 +0200 Subject: s390: wire up userfaultfd system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 59d2bb4..1f4acec 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -290,7 +290,8 @@ #define __NR_s390_pci_mmio_write 352 #define __NR_s390_pci_mmio_read 353 #define __NR_execveat 354 -#define NR_syscalls 355 +#define __NR_userfaultfd 355 +#define NR_syscalls 356 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index f3f4a13..0c91f37 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -363,3 +363,4 @@ SYSCALL(sys_bpf,compat_sys_bpf) SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write) SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) SYSCALL(sys_execveat,compat_sys_execveat) +SYSCALL(sys_userfaultfd,sys_userfaultfd) /* 355 */ diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 2c7cca6..7c1d958 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -72,6 +72,8 @@ #define __NR_userfaultfd 374 #elif defined(__powewrpc__) #define __NR_userfaultfd 364 +#elif defined(__s390__) +#define __NR_userfaultfd 355 #else #error "missing __NR_userfaultfd definition" #endif -- cgit v0.10.2 From 3dc636b2dafbf05b86decc2a55989c3261ffd116 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Tue, 15 Sep 2015 18:09:20 +0200 Subject: s390/configs//zfcpdump_defconfig: Remove CONFIG_MEMSTICK This config option is completely irrelevant for zfcpdump and unfortunately causes a kernel panic on recent kernels in "mspro_block_init()/driver_register()". Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 1b0184a..92805d6 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_COMPAT_BRK is not set @@ -54,10 +53,6 @@ CONFIG_RAW_DRIVER=y # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set # CONFIG_HID is not set -CONFIG_MEMSTICK=y -CONFIG_MEMSTICK_DEBUG=y -CONFIG_MEMSTICK_UNSAFE_RESUME=y -CONFIG_MSPRO_BLOCK=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -- cgit v0.10.2 From 96d4ee8aaa3cfbfadf137904f3696a9a84fab2d3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 7 Sep 2015 12:15:56 -0400 Subject: s390/s390x: allocate sys_membarrier system call number Signed-off-by: Mathieu Desnoyers CC: Andrew Morton CC: linux-api@vger.kernel.org CC: Heiko Carstens CC: linux-s390@vger.kernel.org Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 1f4acec..ff74d49 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -291,7 +291,8 @@ #define __NR_s390_pci_mmio_read 353 #define __NR_execveat 354 #define __NR_userfaultfd 355 -#define NR_syscalls 356 +#define __NR_membarrier 356 +#define NR_syscalls 357 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 0c91f37..e4167bb 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -364,3 +364,4 @@ SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write) SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) SYSCALL(sys_execveat,compat_sys_execveat) SYSCALL(sys_userfaultfd,sys_userfaultfd) /* 355 */ +SYSCALL(sys_membarrier,sys_membarrier) -- cgit v0.10.2 From c4cbba9fa078f55d9f6d081dbb4aec7cf969e7c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Sep 2015 16:18:59 +0100 Subject: arm64: KVM: Disable virtual timer even if the guest is not using it When running a guest with the architected timer disabled (with QEMU and the kernel_irqchip=off option, for example), it is important to make sure the timer gets turned off. Otherwise, the guest may try to enable it anyway, leading to a screaming HW interrupt. The fix is to unconditionally turn off the virtual timer on guest exit. Cc: stable@vger.kernel.org Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 39aa322..60a83e2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -562,8 +562,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -571,6 +569,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 -- cgit v0.10.2 From 688bc577ac42ae3d07c889a1f0a72f0b23763d58 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Sep 2015 16:18:59 +0100 Subject: arm: KVM: Disable virtual timer even if the guest is not using it When running a guest with the architected timer disabled (with QEMU and the kernel_irqchip=off option, for example), it is important to make sure the timer gets turned off. Otherwise, the guest may try to enable it anyway, leading to a screaming HW interrupt. The fix is to unconditionally turn off the virtual timer on guest exit. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 702740d..51a5950 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -515,8 +515,7 @@ ARM_BE8(rev r6, r6 ) mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - bic r2, #1 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL @@ -529,6 +528,9 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: + mov r2, #0 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) -- cgit v0.10.2 From 34c3faa353db8f5d3ce9966cf854d5643c64c4db Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2015 17:15:33 +0100 Subject: arm64: KVM: Remove all traces of the ThumbEE registers Although the ThumbEE registers and traps were present in earlier versions of the v8 architecture, it was retrospectively removed and so we can do the same. Whilst this breaks migrating a guest started on a previous version of the kernel, it is much better to kill these (non existent) registers as soon as possible. Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon [maz: added commend about migration] Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cbc5e1a..9694f26 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -172,7 +172,6 @@ #define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ -#define HSTR_EL2_TTEE (1 << 16) #define HSTR_EL2_T(x) (1 << x) /* Hyp Coproccessor Trap Register Shifts */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 67fa0de..5e37710 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -53,9 +53,7 @@ #define IFSR32_EL2 25 /* Instruction Fault Status Register */ #define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ #define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ -#define TEECR32_EL1 28 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 30 +#define NR_SYS_REGS 28 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 60a83e2..8563477 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -433,20 +433,13 @@ mrs x5, ifsr32_el2 stp x4, x5, [x3] - skip_fpsimd_state x8, 3f + skip_fpsimd_state x8, 2f mrs x6, fpexc32_el2 str x6, [x3, #16] -3: - skip_debug_state x8, 2f +2: + skip_debug_state x8, 1f mrs x7, dbgvcr32_el2 str x7, [x3, #24] -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - mrs x4, teecr32_el1 - mrs x5, teehbr32_el1 - stp x4, x5, [x3] 1: .endm @@ -466,16 +459,9 @@ msr dacr32_el2, x4 msr ifsr32_el2, x5 - skip_debug_state x8, 2f + skip_debug_state x8, 1f ldr x7, [x3, #24] msr dbgvcr32_el2, x7 -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - ldp x4, x5, [x3] - msr teecr32_el1, x4 - msr teehbr32_el1, x5 1: .endm diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d0463e..d03d3af 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -539,13 +539,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), trap_dbgauthstatus_el1 }, - /* TEECR32_EL1 */ - { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, reset_val, TEECR32_EL1, 0 }, - /* TEEHBR32_EL1 */ - { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), - NULL, reset_val, TEEHBR32_EL1, 0 }, - /* MDCCSR_EL1 */ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), trap_raz_wi }, -- cgit v0.10.2 From ef748917b529847277f07c98c55e1c0ce416449f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 2 Sep 2015 14:31:21 +0800 Subject: arm/arm64: KVM: Remove 'config KVM_ARM_MAX_VCPUS' This patch removes config option of KVM_ARM_MAX_VCPUS, and like other ARCHs, just choose the maximum allowed value from hardware, and follows the reasons: 1) from distribution view, the option has to be defined as the max allowed value because it need to meet all kinds of virtulization applications and need to support most of SoCs; 2) using a bigger value doesn't introduce extra memory consumption, and the help text in Kconfig isn't accurate because kvm_vpu structure isn't allocated until request of creating VCPU is sent from QEMU; 3) the main effect is that the field of vcpus[] in 'struct kvm' becomes a bit bigger(sizeof(void *) per vcpu) and need more cache lines to hold the structure, but 'struct kvm' is one generic struct, and it has worked well on other ARCHs already in this way. Also, the world switch frequecy is often low, for example, it is ~2000 when running kernel building load in VM from APM xgene KVM host, so the effect is very small, and the difference can't be observed in my test at all. Cc: Dann Frazier Signed-off-by: Ming Lei Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dcba0fa..c8c226a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -29,12 +29,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#if defined(CONFIG_KVM_ARM_MAX_VCPUS) -#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS -#else -#define KVM_MAX_VCPUS 0 -#endif - #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -44,6 +38,8 @@ #include +#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS + u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index bfb915d..210ecca 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -45,15 +45,4 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 - help - Static number of max supported virtual CPUs per VM. - - If you choose a high number, the vcpu structures will be quite - large, so only choose a reasonable number that you expect to - actually use. - endif # VIRTUALIZATION diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 415938d..3fb58ea 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,12 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#if defined(CONFIG_KVM_ARM_MAX_VCPUS) -#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS -#else -#define KVM_MAX_VCPUS 0 -#endif - #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -43,6 +37,8 @@ #include #include +#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS + #define KVM_VCPU_MAX_FEATURES 3 int __attribute_const__ kvm_target_cpu(void); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index bfffe8f..5c7e920e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -41,15 +41,4 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 - help - Static number of max supported virtual CPUs per VM. - - If you choose a high number, the vcpu structures will be quite - large, so only choose a reasonable number that you expect to - actually use. - endif # VIRTUALIZATION diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d901f1a..4e14dac 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -35,11 +35,7 @@ #define VGIC_V3_MAX_LRS 16 #define VGIC_MAX_IRQS 1024 #define VGIC_V2_MAX_CPUS 8 - -/* Sanity checks... */ -#if (KVM_MAX_VCPUS > 255) -#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now -#endif +#define VGIC_V3_MAX_CPUS 255 #if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index afbf925..7dd5d62 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -288,7 +288,7 @@ int vgic_v3_probe(struct device_node *vgic_node, vgic->vctrl_base = NULL; vgic->type = VGIC_V3; - vgic->max_gic_vcpus = KVM_MAX_VCPUS; + vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); -- cgit v0.10.2 From 590f07874e8e3c83729b919312c65aea2533c8cf Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Mon, 14 Sep 2015 07:38:36 +0000 Subject: x86/pci/dma: Fix gfp flags for coherent DMA memory allocation Commit 6894258eda2f reversed the order of gfp_flags adjustment in dma_alloc_attrs() for x86 [arch/x86/kernel/pci-dma.c] As a result, relevant flags set by dma_alloc_coherent_gfp_flags() are just discarded and cause coherent DMA memory allocation failure on some devices. Fixes: 6894258eda2f ("dma-mapping: consolidate dma_{alloc,free}_{attrs,coherent}") Signed-off-by: Jun'ichi Nomura Tested-by: Tony Luck Acked-by: Christoph Hellwig Link: http://lkml.kernel.org/r/20150914073834.GA13077@xzibit.linux.bs1.fc.nec.co.jp Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 84b8ef8..1b55de1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -131,8 +131,8 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!*dev) *dev = &x86_dma_fallback_dev; -- cgit v0.10.2 From 4be9c1fc3df9c3b03c9bde8aec5e44fc73996a3f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 16 Sep 2015 21:24:47 +0800 Subject: libnvdimm: btt_devs: Fix locking in namespace_store Always take device_lock() before nvdimm_bus_lock() to prevent deadlock. Cc: Signed-off-by: Axel Lin Signed-off-by: Dan Williams diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 59ad54a6..cb47751 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -128,13 +128,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } -- cgit v0.10.2 From 4ca8b57a0af145f4e791f21dbca6ad789da9ee8b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 16 Sep 2015 21:25:38 +0800 Subject: libnvdimm: pfn_devs: Fix locking in namespace_store Always take device_lock() before nvdimm_bus_lock() to prevent deadlock. Signed-off-by: Axel Lin Signed-off-by: Dan Williams diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3fd7d0d..71805a1 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -148,13 +148,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } -- cgit v0.10.2 From ba8fe0f85e15d047686caf8a42463b592c63c98c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 16 Sep 2015 14:52:21 -0600 Subject: pmem: add proper fencing to pmem_rw_page() pmem_rw_page() needs to call wmb_pmem() on writes to make sure that the newly written data is durable. This flow was added to pmem_rw_bytes() and pmem_make_request() with this commit: commit 61031952f4c8 ("arch, x86: pmem api for ensuring durability of persistent memory updates") ...the pmem_rw_page() path was missed. Cc: Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b952538..0ba6a97 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,6 +92,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_disk->private_data; pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); + if (rw & WRITE) + wmb_pmem(); page_endio(page, rw & WRITE, 0); return 0; -- cgit v0.10.2 From 52cc6eead9095e2faf2ec7afc013aa3af1f01ac5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 17 Sep 2015 09:58:38 -0600 Subject: block: blk-merge: fast-clone bio when splitting rw bios biovecs has become immutable since v3.13, so it isn't necessary to allocate biovecs for the new cloned bios, then we can save one extra biovecs allocation/copy, and the allocation is often not fixed-length and a bit more expensive. For example, if the 'max_sectors_kb' of null blk's queue is set as 16(32 sectors) via sysfs just for making more splits, this patch can increase throught about ~70% in the sequential read test over null_blk(direct io, bs: 1M). Cc: Christoph Hellwig Cc: Kent Overstreet Cc: Ming Lin Cc: Dongsu Park Signed-off-by: Ming Lei This fixes a performance regression introduced by commit 54efd50bfd, and allows us to take full advantage of the fact that we have immutable bio_vecs. Hand applied, as it rejected violently with commit 5014c311baa2. Signed-off-by: Jens Axboe diff --git a/block/blk-merge.c b/block/blk-merge.c index 574ea7c..c4e9c37 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -66,15 +66,12 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio, struct bio_set *bs) { - struct bio *split; struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; unsigned seg_size = 0, nsegs = 0, sectors = 0; bio_for_each_segment(bv, bio, iter) { - sectors += bv.bv_len >> 9; - - if (sectors > queue_max_sectors(q)) + if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) goto split; /* @@ -95,6 +92,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, seg_size += bv.bv_len; bvprv = bv; bvprvp = &bv; + sectors += bv.bv_len >> 9; continue; } new_segment: @@ -105,21 +103,12 @@ new_segment: bvprv = bv; bvprvp = &bv; seg_size = bv.bv_len; + sectors += bv.bv_len >> 9; } return NULL; split: - split = bio_clone_bioset(bio, GFP_NOIO, bs); - - split->bi_iter.bi_size -= iter.bi_size; - bio->bi_iter = iter; - - if (bio_integrity(bio)) { - bio_integrity_advance(bio, split->bi_iter.bi_size); - bio_integrity_trim(split, 0, bio_sectors(split)); - } - - return split; + return bio_split(bio, sectors, GFP_NOIO, bs); } void blk_queue_split(struct request_queue *q, struct bio **bio, -- cgit v0.10.2 From 994518799930fc363d47cb7cf0d1abed1790bf16 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 18 Sep 2015 00:06:28 +0800 Subject: block: fix bounce_end_io When bio bounce is involved, one new bio and its biovecs are cloned from the comming bio, which can be one fast-cloned bio from upper layer(such as dm). So it is obviously wrong to assume the start index of the coming( original) bio's io vector is zero, which can be any value between 0 and (bi_max_vecs - 1), especially in case of bio split. This patch fixes Fedora's booting oops on i386, often with the following kernel log together: > [ 9.026738] systemd[1]: Switching root. > [ 9.036467] systemd-journald[149]: Received SIGTERM from PID 1 > (systemd). > [ 9.082262] BUG: Bad page state in process kworker/u5:1 pfn:372ac > [ 9.083989] page:f3d32ae0 count:0 mapcount:0 mapping:f2252178 > index:0x16a > [ 9.085755] flags: 0x40020021(locked|lru|mappedtodisk) > [ 9.087284] page dumped because: page still charged to cgroup > [ 9.088772] bad because of flags: > [ 9.089731] flags: 0x21(locked|lru) > [ 9.090818] page->mem_cgroup:f2c3e400 Reported-by: Josh Boyer Tested-by: Adam Williamson Cc: Ming Lin Cc: Mike Snitzer Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/block/bounce.c b/block/bounce.c index 2c310ea..457ebd5 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -128,12 +128,14 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) struct bio *bio_orig = bio->bi_private; struct bio_vec *bvec, *org_vec; int i; + int start = bio_orig->bi_iter.bi_idx; /* * free up bounce indirect pages used */ bio_for_each_segment_all(bvec, bio, i) { - org_vec = bio_orig->bi_io_vec + i; + org_vec = bio_orig->bi_io_vec + i + start; + if (bvec->bv_page == org_vec->bv_page) continue; -- cgit v0.10.2 From 7f61f545657281a3a1b0faf68993165ebdecc51b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 16:01:05 +0300 Subject: libceph: don't access invalid memory in keepalive2 path This struct ceph_timespec ceph_ts; ... con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); wraps ceph_ts into a kvec and adds it to con->out_kvec array, yet ceph_ts becomes invalid on return from prepare_write_keepalive(). As a result, we send out bogus keepalive2 stamps. Fix this by encoding into a ceph_timespec member, similar to how acks are read and written. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7e1252e..b2371d9 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,6 +238,8 @@ struct ceph_connection { bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ /* message in temps */ struct ceph_msg_header in_hdr; @@ -248,7 +250,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; + struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 525f454..b9b0e3b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1353,11 +1353,12 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec ts = CURRENT_TIME; - struct ceph_timespec ceph_ts; - ceph_encode_timespec(&ceph_ts, &ts); + struct timespec now = CURRENT_TIME; + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); - con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); + ceph_encode_timespec(&con->out_temp_keepalive2, &now); + con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), + &con->out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } -- cgit v0.10.2 From 335c25858218e76ef47f92ecb9d22e919d36140d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 12:44:22 +0300 Subject: libceph: advertise support for keepalive2 We are the client, but advertise keepalive2 anyway - for consistency, if nothing else. In the future the server might want to know whether its clients support keepalive2. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4763ad6..f89b31d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v0.10.2 From 23c3f310e897837aeb8ffe8700b803cb58e7b35d Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 17 Sep 2015 14:50:20 +0100 Subject: regulator: core: Correct return value check in regulator_resolve_supply The ret pointer passed to regulator_dev_lookup is only filled with a valid error code if regulator_dev_lookup returned NULL. Currently regulator_resolve_supply checks this ret value before it checks if a regulator was returned, this can result in valid regulator lookups being ignored. Fixes: 6261b06de565 ("regulator: Defer lookup of supply to regulator_get") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7150ff6..d0fbaea 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1394,15 +1394,15 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) return 0; r = regulator_dev_lookup(dev, rdev->supply_name, &ret); - if (ret == -ENODEV) { - /* - * No supply was specified for this regulator and - * there will never be one. - */ - return 0; - } - if (!r) { + if (ret == -ENODEV) { + /* + * No supply was specified for this regulator and + * there will never be one. + */ + return 0; + } + if (have_full_constraints()) { r = dummy_regulator_rdev; } else { -- cgit v0.10.2 From d34e210ed3a28050441f15228fd5ed929028d9cd Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Wed, 16 Sep 2015 09:42:59 +0200 Subject: drivers: clk: st: Rename st_pll3200c32_407_c0_x into st_pll3200c32_cx_x Use a generic name for this kind of PLL Correction in dts files are already done here: commit 5eb26c605909 ("ARM: STi: DT: Rename st_pll3200c32_407_c0_x into st_pll3200c32_cx_x") Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 83ccf14..576cd03 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -307,7 +307,7 @@ static const struct clkgen_quadfs_data st_fs660c32_F_416 = { .get_rate = clk_fs660c32_dig_get_rate, }; -static const struct clkgen_quadfs_data st_fs660c32_C_407 = { +static const struct clkgen_quadfs_data st_fs660c32_C = { .nrst_present = true, .nrst = { CLKGEN_FIELD(0x2f0, 0x1, 0), CLKGEN_FIELD(0x2f0, 0x1, 1), @@ -350,7 +350,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { .get_rate = clk_fs660c32_dig_get_rate, }; -static const struct clkgen_quadfs_data st_fs660c32_D_407 = { +static const struct clkgen_quadfs_data st_fs660c32_D = { .nrst_present = true, .nrst = { CLKGEN_FIELD(0x2a0, 0x1, 0), CLKGEN_FIELD(0x2a0, 0x1, 1), @@ -1077,11 +1077,11 @@ static const struct of_device_id quadfs_of_match[] = { }, { .compatible = "st,stih407-quadfs660-C", - .data = &st_fs660c32_C_407 + .data = &st_fs660c32_C }, { .compatible = "st,stih407-quadfs660-D", - .data = &st_fs660c32_D_407 + .data = &st_fs660c32_D }, {} }; diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 47a38a9..b2a332c 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -193,7 +193,7 @@ static const struct clkgen_pll_data st_pll3200c32_407_a0 = { .ops = &stm_pll3200c32_ops, }; -static const struct clkgen_pll_data st_pll3200c32_407_c0_0 = { +static const struct clkgen_pll_data st_pll3200c32_cx_0 = { /* 407 C0 PLL0 */ .pdn_status = CLKGEN_FIELD(0x2a0, 0x1, 8), .locked_status = CLKGEN_FIELD(0x2a0, 0x1, 24), @@ -205,7 +205,7 @@ static const struct clkgen_pll_data st_pll3200c32_407_c0_0 = { .ops = &stm_pll3200c32_ops, }; -static const struct clkgen_pll_data st_pll3200c32_407_c0_1 = { +static const struct clkgen_pll_data st_pll3200c32_cx_1 = { /* 407 C0 PLL1 */ .pdn_status = CLKGEN_FIELD(0x2c8, 0x1, 8), .locked_status = CLKGEN_FIELD(0x2c8, 0x1, 24), @@ -624,12 +624,12 @@ static const struct of_device_id c32_pll_of_match[] = { .data = &st_pll3200c32_407_a0, }, { - .compatible = "st,stih407-plls-c32-c0_0", - .data = &st_pll3200c32_407_c0_0, + .compatible = "st,plls-c32-cx_0", + .data = &st_pll3200c32_cx_0, }, { - .compatible = "st,stih407-plls-c32-c0_1", - .data = &st_pll3200c32_407_c0_1, + .compatible = "st,plls-c32-cx_1", + .data = &st_pll3200c32_cx_1, }, { .compatible = "st,stih407-plls-c32-a9", -- cgit v0.10.2 From d7ba2a024cf19261b34825f89d30ceef9a5ff864 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 29 Jul 2015 18:45:33 +0900 Subject: of: add vendor prefix for Socionext Inc. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index ac5f0c3..82d2ac9 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -203,6 +203,7 @@ sitronix Sitronix Technology Corporation skyworks Skyworks Solutions, Inc. smsc Standard Microsystems Corporation snps Synopsys, Inc. +socionext Socionext Inc. solidrun SolidRun solomon Solomon Systech Limited sony Sony Corporation -- cgit v0.10.2 From dc4dae00d82fedcd7a632f786f9f76c1f7f929a5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Sep 2015 10:49:03 +0100 Subject: Docs: dt: add #msi-cells to GICv3 ITS binding The GICv3 ITS uses sideband master identification data (known as a DeviceID) to identify which master wrote to a doorbell, and this data is used to determine how to react in response to the write. Commit 1e6db000482fa65a ("irqchip/gicv3-its: Add platform MSI support") added support per this binding, but failed to update the documentation. This patch fixes the documentation. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Cc: Rob Herring Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/arm/gic-v3.txt b/Documentation/devicetree/bindings/arm/gic-v3.txt index ddfade4..7803e77 100644 --- a/Documentation/devicetree/bindings/arm/gic-v3.txt +++ b/Documentation/devicetree/bindings/arm/gic-v3.txt @@ -57,6 +57,8 @@ used to route Message Signalled Interrupts (MSI) to the CPUs. These nodes must have the following properties: - compatible : Should at least contain "arm,gic-v3-its". - msi-controller : Boolean property. Identifies the node as an MSI controller +- #msi-cells: Must be <1>. The single msi-cell is the DeviceID of the device + which will generate the MSI. - reg: Specifies the base physical address and size of the ITS registers. @@ -83,6 +85,7 @@ Examples: gic-its@2c200000 { compatible = "arm,gic-v3-its"; msi-controller; + #msi-cells = <1>; reg = <0x0 0x2c200000 0 0x200000>; }; }; @@ -107,12 +110,14 @@ Examples: gic-its@2c200000 { compatible = "arm,gic-v3-its"; msi-controller; + #msi-cells = <1>; reg = <0x0 0x2c200000 0 0x200000>; }; gic-its@2c400000 { compatible = "arm,gic-v3-its"; msi-controller; + #msi-cells = <1>; reg = <0x0 0x2c400000 0 0x200000>; }; }; -- cgit v0.10.2 From eb168b70dea54578a45119fcdb3b48ad5d75fed9 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 8 Sep 2015 12:20:48 +0100 Subject: of: thermal: Fix inconsitency between cooling-*-state and cooling-*-level The device trees in the kernel as well as the binding description in Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt use the cooling-{min,max}-level property. Fix the inconsistency with the binding description in Documentation/devicetree/bindings/thermal/thermal.txt by changing cooling-*-state properties to cooling-*-level. Signed-off-by: Punit Agrawal Cc: Eduardo Valentin Cc: Rob Herring Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 8a49362..8320186 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -55,16 +55,16 @@ of heat dissipation). For example a fan's cooling states correspond to the different fan speeds possible. Cooling states are referred to by single unsigned integers, where larger numbers mean greater heat dissipation. The precise set of cooling states associated with a device -(as referred to be the cooling-min-state and cooling-max-state +(as referred to by the cooling-min-level and cooling-max-level properties) should be defined in a particular device's binding. For more examples of cooling devices, refer to the example sections below. Required properties: -- cooling-min-state: An integer indicating the smallest +- cooling-min-level: An integer indicating the smallest Type: unsigned cooling state accepted. Typically 0. Size: one cell -- cooling-max-state: An integer indicating the largest +- cooling-max-level: An integer indicating the largest Type: unsigned cooling state accepted. Size: one cell @@ -225,8 +225,8 @@ cpus { 396000 950000 198000 850000 >; - cooling-min-state = <0>; - cooling-max-state = <3>; + cooling-min-level = <0>; + cooling-max-level = <3>; #cooling-cells = <2>; /* min followed by max */ }; ... @@ -240,8 +240,8 @@ cpus { */ fan0: fan@0x48 { ... - cooling-min-state = <0>; - cooling-max-state = <9>; + cooling-min-level = <0>; + cooling-max-level = <9>; #cooling-cells = <2>; /* min followed by max */ }; }; -- cgit v0.10.2 From 9fa04fbeb78eab6f3817e444644348c46c7e2370 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 8 Sep 2015 12:20:49 +0100 Subject: of: thermal: Mark cooling-*-level properties optional The cooling-{min,max}-level properties are marked as optional in Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt and the usage in various device tree matches this, i.e., some cooling device in the device trees provide these properties while others do not. Make the bindings in Documentation/devicetree/bindings/thermal/thermal.txt consistent with the cpufreq-dt bindings by marking the cooling-*-level properties as optional. Signed-off-by: Punit Agrawal Cc: Eduardo Valentin Cc: Rob Herring Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 8320186..41b817f 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -60,14 +60,6 @@ properties) should be defined in a particular device's binding. For more examples of cooling devices, refer to the example sections below. Required properties: -- cooling-min-level: An integer indicating the smallest - Type: unsigned cooling state accepted. Typically 0. - Size: one cell - -- cooling-max-level: An integer indicating the largest - Type: unsigned cooling state accepted. - Size: one cell - - #cooling-cells: Used to provide cooling device specific information Type: unsigned while referring to it. Must be at least 2, in order Size: one cell to specify minimum and maximum cooling state used @@ -77,6 +69,15 @@ Required properties: See Cooling device maps section below for more details on how consumers refer to cooling devices. +Optional properties: +- cooling-min-level: An integer indicating the smallest + Type: unsigned cooling state accepted. Typically 0. + Size: one cell + +- cooling-max-level: An integer indicating the largest + Type: unsigned cooling state accepted. + Size: one cell + * Trip points The trip node is a node to describe a point in the temperature domain -- cgit v0.10.2 From 31b47ae3f18b31d86f553198e624b3b38f6397a2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 12 Sep 2015 12:52:49 +0200 Subject: devicetree: bindings: Extend the bma180 bindings with bma250 info The bma180 / bma250 accelerometers share a driver (at least under Linux), so it makes sense to also have their bindings info in a single .txt. This commit extends the bma180 bindings with bma250 bindings, specifically it specifies how the 2 seperate interrupts the bma250 has must be listed in devicetree. The existing bma180 driver is already fully compatible with the specified bindings. Signed-off-by: Hans de Goede Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/iio/accel/bma180.txt b/Documentation/devicetree/bindings/iio/accel/bma180.txt index c593357..4a3679d 100644 --- a/Documentation/devicetree/bindings/iio/accel/bma180.txt +++ b/Documentation/devicetree/bindings/iio/accel/bma180.txt @@ -1,10 +1,11 @@ -* Bosch BMA180 triaxial acceleration sensor +* Bosch BMA180 / BMA250 triaxial acceleration sensor http://omapworld.com/BMA180_111_1002839.pdf +http://ae-bst.resource.bosch.com/media/products/dokumente/bma250/bst-bma250-ds002-05.pdf Required properties: - - compatible : should be "bosch,bma180" + - compatible : should be "bosch,bma180" or "bosch,bma250" - reg : the I2C address of the sensor Optional properties: @@ -13,6 +14,9 @@ Optional properties: - interrupts : interrupt mapping for GPIO IRQ, it should by configured with flags IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING + For the bma250 the first interrupt listed must be the one + connected to the INT1 pin, the second (optional) interrupt + listed must be the one connected to the INT2 pin. Example: -- cgit v0.10.2 From c8aa33a7114704f3f5a359893a8e1de9ea2940a5 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 9 Sep 2015 15:57:59 -0700 Subject: of_pci_irq: Silence bogus "of_irq_parse_pci() failed ..." messages. It is perfectly legitimate for a PCI device to have an PCI_INTERRUPT_PIN value of zero. This happens if the device doesn't use interrupts, or on PCIe devices, where only MSI/MSI-X are supported. Silence the annoying "of_irq_parse_pci() failed with rc=-19" error messages by moving the printing code into of_irq_parse_pci(), and only emitting the message for cases where PCI_INTERRUPT_PIN == 0 is not the cause for an early exit. Signed-off-by: David Daney Signed-off-by: Rob Herring diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c index 1710d9d..2306313 100644 --- a/drivers/of/of_pci_irq.c +++ b/drivers/of/of_pci_irq.c @@ -38,8 +38,8 @@ int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq */ rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); if (rc != 0) - return rc; - /* No pin, exit */ + goto err; + /* No pin, exit with no error message. */ if (pin == 0) return -ENODEV; @@ -53,8 +53,10 @@ int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq ppnode = pci_bus_to_OF_node(pdev->bus); /* No node for host bridge ? give up */ - if (ppnode == NULL) - return -EINVAL; + if (ppnode == NULL) { + rc = -EINVAL; + goto err; + } } else { /* We found a P2P bridge, check if it has a node */ ppnode = pci_device_to_OF_node(ppdev); @@ -86,7 +88,13 @@ int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq out_irq->args[0] = pin; laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8)); laddr[1] = laddr[2] = cpu_to_be32(0); - return of_irq_parse_raw(laddr, out_irq); + rc = of_irq_parse_raw(laddr, out_irq); + if (rc) + goto err; + return 0; +err: + dev_err(&pdev->dev, "of_irq_parse_pci() failed with rc=%d\n", rc); + return rc; } EXPORT_SYMBOL_GPL(of_irq_parse_pci); @@ -105,10 +113,8 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin) int ret; ret = of_irq_parse_pci(dev, &oirq); - if (ret) { - dev_err(&dev->dev, "of_irq_parse_pci() failed with rc=%d\n", ret); + if (ret) return 0; /* Proper return code 0 == NO_IRQ */ - } return irq_create_of_mapping(&oirq); } -- cgit v0.10.2 From a41cbe86df3afbc82311a1640e20858c0cd7e065 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 14 Sep 2015 19:54:36 -0400 Subject: Failing to send a CLOSE if file is opened WRONLY and server reboots on a 4.x mount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A test case is as the description says: open(foobar, O_WRONLY); sleep() --> reboot the server close(foobar) The bug is because in nfs4state.c in nfs4_reclaim_open_state() a few line before going to restart, there is clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &state->flags). NFS4CLNT_RECLAIM_NOGRACE is a flag for the client states not open owner states. Value of NFS4CLNT_RECLAIM_NOGRACE is 4 which is the value of NFS_O_WRONLY_STATE in nfs4_state->flags. So clearing it wipes out state and when we go to close it, “call_close” doesn’t get set as state flag is not set and CLOSE doesn’t go on the wire. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da73bc4..5db3246 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1481,7 +1481,7 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); - clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); spin_lock(&sp->so_lock); goto restart; -- cgit v0.10.2 From 048883e0b934d9a5103d40e209cb14b7f33d2933 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 11 Sep 2015 11:14:06 +0800 Subject: nfs: fix pg_test page count calculation We really want sizeof(struct page *) instead. Otherwise we limit maximum IO size to 64 pages rather than 512 pages on a 64bit system. Fixes 2e11f829(nfs: cap request size to fit a kmalloced page array). Cc: Christoph Hellwig Signed-off-by: Peng Tao Fixes: 2e11f8296d22 ("nfs: cap request size to fit a kmalloced page array") Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7c5718b..fe3ddd2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * for it without upsetting the slab allocator. */ if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * - sizeof(struct page) > PAGE_SIZE) + sizeof(struct page *) > PAGE_SIZE) return 0; return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); -- cgit v0.10.2 From 03c78827db35be20ffa71cb5ccd2cedb192f95d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Sep 2015 10:42:27 -0400 Subject: SUNRPC: Fix races between socket connection and destroy code When we're destroying the socket transport, we need to ensure that we cancel any existing delayed connection attempts, and order them w.r.t. the call to xs_close(). Reported-by:"Suzuki K. Poulose" Acked-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7be90bc..d2dfbd0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -881,8 +881,11 @@ static void xs_xprt_free(struct rpc_xprt *xprt) */ static void xs_destroy(struct rpc_xprt *xprt) { + struct sock_xprt *transport = container_of(xprt, + struct sock_xprt, xprt); dprintk("RPC: xs_destroy xprt %p\n", xprt); + cancel_delayed_work_sync(&transport->connect_worker); xs_close(xprt); xs_xprt_free(xprt); module_put(THIS_MODULE); -- cgit v0.10.2 From 306a5549355966e480e0dcacdc6b9321d153e0c0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 16 Sep 2015 17:21:27 -0400 Subject: nfs: fix v4.2 SEEK on files over 2 gigs We're incorrectly assigning a loff_t return to an int. If SEEK_HOLE or SEEK_DATA returns an offset over 2^31 then the application will see a weird lseek() result (usually -EIO). Cc: stable@vger.kernel.org Fixes: bdcc2cd14e4e "NFSv4.2: handle NFS-specific llseek errors" Signed-off-by: J. Bruce Fields Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index d731bbf..0f020e4 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct nfs4_exception exception = { }; - int err; + loff_t err; do { err = _nfs42_proc_llseek(filep, offset, whence); + if (err >= 0) + break; if (err == -ENOTSUPP) return -EOPNOTSUPP; err = nfs4_handle_exception(server, err, &exception); -- cgit v0.10.2 From 17a9618e984234fda167ea5e07eae6f4f2ea2186 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:07 +0200 Subject: SUNRPC: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); // Signed-off-by: Julia Lawall Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b140c09..425ca2f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1092,14 +1092,10 @@ void rpc_destroy_mempool(void) { rpciod_stop(); - if (rpc_buffer_mempool) - mempool_destroy(rpc_buffer_mempool); - if (rpc_task_mempool) - mempool_destroy(rpc_task_mempool); - if (rpc_task_slabp) - kmem_cache_destroy(rpc_task_slabp); - if (rpc_buffer_slabp) - kmem_cache_destroy(rpc_buffer_slabp); + mempool_destroy(rpc_buffer_mempool); + mempool_destroy(rpc_task_mempool); + kmem_cache_destroy(rpc_task_slabp); + kmem_cache_destroy(rpc_buffer_slabp); rpc_destroy_wait_queue(&delay_queue); } -- cgit v0.10.2 From 37a1d3611c126fd9782ce5235791f898f053e763 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 13 Sep 2015 10:18:33 -0700 Subject: ipv6: include NLM_F_REPLACE in route replace notifications This patch adds NLM_F_REPLACE flag to ipv6 route replace notifications. This makes nlm_flags in ipv6 replace notifications consistent with ipv4. Signed-off-by: Roopa Prabhu Acked-by: Nicolas Dichtel Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 063d304..aaf9700 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -275,7 +275,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc); int fib6_del(struct rt6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 8a9ec01..7d2e002 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -851,7 +851,7 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); + inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { @@ -877,7 +877,7 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; @@ -1422,7 +1422,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fib6_purge_rt(rt, fn, net); - inet6_rt_notify(RTM_DELROUTE, rt, info); + inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3d3c1b2..d5fa502 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3304,7 +3304,8 @@ errout: return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int nlm_flags) { struct sk_buff *skb; struct net *net = info->nl_net; @@ -3319,7 +3320,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); -- cgit v0.10.2 From 0fdea1e8a2853f79d39b8555cc9de16a7e0ab26f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Sep 2015 23:43:17 -0400 Subject: SUNRPC: Ensure that we wait for connections to complete before retrying Commit 718ba5b87343, moved the responsibility for unlocking the socket to xs_tcp_setup_socket, meaning that the socket will be unlocked before we know that it has finished trying to connect. The following patch is based on an initial patch by Russell King to ensure that we delay clearing the XPRT_CONNECTING flag until we either know that we failed to initiate a connection attempt, or the connection attempt itself failed. Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing") Reported-by: Russell King Reported-by: Russell King Tested-by: Russell King Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7591788..357e44c 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,7 @@ struct sock_xprt { /* * Connection of transports */ + unsigned long sock_state; struct delayed_work connect_worker; struct sockaddr_storage srcaddr; unsigned short srcport; @@ -76,6 +77,8 @@ struct sock_xprt { */ #define TCP_RPC_REPLY (1UL << 6) +#define XPRT_SOCK_CONNECTING 1U + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d2dfbd0..c350385 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1438,6 +1438,7 @@ out: static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; + struct sock_xprt *transport; read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) @@ -1449,13 +1450,12 @@ static void xs_tcp_state_change(struct sock *sk) sock_flag(sk, SOCK_ZAPPED), sk->sk_shutdown); + transport = container_of(xprt, struct sock_xprt, xprt); trace_rpc_socket_state_change(xprt, sk->sk_socket); switch (sk->sk_state) { case TCP_ESTABLISHED: spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { - struct sock_xprt *transport = container_of(xprt, - struct sock_xprt, xprt); /* Reset TCP record info */ transport->tcp_offset = 0; @@ -1464,6 +1464,8 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); + xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, -EAGAIN); } @@ -1499,6 +1501,9 @@ static void xs_tcp_state_change(struct sock *sk) smp_mb__after_atomic(); break; case TCP_CLOSE: + if (test_and_clear_bit(XPRT_SOCK_CONNECTING, + &transport->sock_state)) + xprt_clear_connecting(xprt); xs_sock_mark_closed(xprt); } out: @@ -2182,6 +2187,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; + set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { case 0: @@ -2243,7 +2249,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_unlock_connect(xprt, transport); - xprt_clear_connecting(xprt); return; case -EINVAL: /* Happens, for instance, if the user specified a link -- cgit v0.10.2 From 3ec0c97959abff33a42db9081c22132bcff5b4f2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 14 Sep 2015 20:12:21 +0800 Subject: nfs/filelayout: Fix NULL reference caused by double freeing of fh_array If filelayout_decode_layout fail, _filelayout_free_lseg will causes a double freeing of fh_array. [ 1179.279800] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1179.280198] IP: [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.281010] PGD 0 [ 1179.281443] Oops: 0000 [#1] [ 1179.281831] Modules linked in: nfs_layout_nfsv41_files(OE) nfsv4(OE) nfs(OE) fscache(E) xfs libcrc32c coretemp nfsd crct10dif_pclmul ppdev crc32_pclmul crc32c_intel auth_rpcgss ghash_clmulni_intel nfs_acl lockd vmw_balloon grace sunrpc parport_pc vmw_vmci parport shpchp i2c_piix4 vmwgfx drm_kms_helper ttm drm serio_raw mptspi scsi_transport_spi mptscsih e1000 mptbase ata_generic pata_acpi [last unloaded: fscache] [ 1179.283891] CPU: 0 PID: 13336 Comm: cat Tainted: G OE 4.3.0-rc1-pnfs+ #244 [ 1179.284323] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [ 1179.285206] task: ffff8800501d48c0 ti: ffff88003e3c4000 task.ti: ffff88003e3c4000 [ 1179.285668] RIP: 0010:[] [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.286612] RSP: 0018:ffff88003e3c77f8 EFLAGS: 00010202 [ 1179.287092] RAX: 0000000000000000 RBX: ffff88001fe78900 RCX: 0000000000000000 [ 1179.287731] RDX: ffffea0000f40760 RSI: ffff88001fe789c8 RDI: ffff88001fe789c0 [ 1179.288383] RBP: ffff88003e3c7810 R08: ffffea0000f40760 R09: 0000000000000000 [ 1179.289170] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88001fe789c8 [ 1179.289959] R13: ffff88001fe789c0 R14: ffff88004ec05a80 R15: ffff88004f935b88 [ 1179.290791] FS: 00007f4e66bb5700(0000) GS:ffffffff81c29000(0000) knlGS:0000000000000000 [ 1179.291580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1179.292209] CR2: 0000000000000000 CR3: 00000000203f8000 CR4: 00000000001406f0 [ 1179.292731] Stack: [ 1179.293195] ffff88001fe78900 00000000000000d0 ffff88001fe78178 ffff88003e3c7868 [ 1179.293676] ffffffffa0272737 0000000000000001 0000000000000001 ffff88001fe78800 [ 1179.294151] 00000000614fffce ffffffff81727671 ffff88001fe78100 ffff88001fe78100 [ 1179.294623] Call Trace: [ 1179.295092] [] filelayout_alloc_lseg+0xa7/0x2d0 [nfs_layout_nfsv41_files] [ 1179.295625] [] ? out_of_line_wait_on_bit+0x81/0xb0 [ 1179.296133] [] pnfs_layout_process+0xae/0x320 [nfsv4] [ 1179.296632] [] nfs4_proc_layoutget+0x2b1/0x360 [nfsv4] [ 1179.297134] [] pnfs_update_layout+0x853/0xb30 [nfsv4] [ 1179.297632] [] ? nfs_get_lock_context+0x74/0x170 [nfs] [ 1179.298158] [] filelayout_pg_init_read+0x37/0x50 [nfs_layout_nfsv41_files] [ 1179.298834] [] __nfs_pageio_add_request+0x119/0x460 [nfs] [ 1179.299385] [] ? nfs_create_request.part.9+0x37/0x2e0 [nfs] [ 1179.299872] [] nfs_pageio_add_request+0xa3/0x1b0 [nfs] [ 1179.300362] [] readpage_async_filler+0x85/0x260 [nfs] [ 1179.300907] [] read_cache_pages+0x91/0xd0 [ 1179.301391] [] ? nfs_read_completion+0x220/0x220 [nfs] [ 1179.301867] [] nfs_readpages+0x128/0x200 [nfs] [ 1179.302330] [] __do_page_cache_readahead+0x203/0x280 [ 1179.302784] [] ? __do_page_cache_readahead+0xd8/0x280 [ 1179.303413] [] ondemand_readahead+0x1a6/0x2f0 [ 1179.303855] [] page_cache_sync_readahead+0x31/0x50 [ 1179.304286] [] generic_file_read_iter+0x4a6/0x5c0 [ 1179.304711] [] ? __nfs_revalidate_mapping+0x1f6/0x240 [nfs] [ 1179.305132] [] nfs_file_read+0x52/0xa0 [nfs] [ 1179.305540] [] __vfs_read+0xcc/0x100 [ 1179.305936] [] vfs_read+0x85/0x130 [ 1179.306326] [] SyS_read+0x58/0xd0 [ 1179.306708] [] entry_SYSCALL_64_fastpath+0x12/0x76 [ 1179.307094] Code: c4 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 8b 07 49 89 f4 85 c0 74 47 48 8b 06 49 89 fd <48> 8b 38 48 85 ff 74 22 31 db eb 0c 48 63 d3 48 8b 3c d0 48 85 [ 1179.308357] RIP [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.309177] RSP [ 1179.309582] CR2: 0000000000000000 Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index b34f2e2..02ec079 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -629,23 +629,18 @@ out_put: goto out; } -static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) { int i; - for (i = 0; i < fl->num_fh; i++) { - if (!fl->fh_array[i]) - break; - kfree(fl->fh_array[i]); + if (fl->fh_array) { + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); } - kfree(fl->fh_array); - fl->fh_array = NULL; -} - -static void -_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) -{ - filelayout_free_fh_array(fl); kfree(fl); } @@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, /* Do we want to use a mempool here? */ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) - goto out_err_free; + goto out_err; p = xdr_inline_decode(&stream, 4); if (unlikely(!p)) - goto out_err_free; + goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); - goto out_err_free; + goto out_err; } p = xdr_inline_decode(&stream, fl->fh_array[i]->size); if (unlikely(!p)) - goto out_err_free; + goto out_err; memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); dprintk("DEBUG: %s: fh len %d\n", __func__, fl->fh_array[i]->size); @@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __free_page(scratch); return 0; -out_err_free: - filelayout_free_fh_array(fl); out_err: __free_page(scratch); return -EIO; -- cgit v0.10.2 From cc5706056baa3002b844ff240a1cc2199a978795 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 14 Sep 2015 11:14:50 -0700 Subject: openvswitch: Fix IPv6 exthdr handling with ct helpers. Static code analysis reveals the following bug: net/openvswitch/conntrack.c:281 ovs_ct_helper() warn: unsigned 'protoff' is never less than zero. This signedness bug breaks error handling for IPv6 extension headers when using conntrack helpers. Fix the error by using a local signed variable. Fixes: cae3a2627520: "openvswitch: Allow attaching helpers to ct action" Reported-by: Dan Carpenter Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e8e524a..002a755 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -275,13 +275,15 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) case NFPROTO_IPV6: { u8 nexthdr = ipv6_hdr(skb)->nexthdr; __be16 frag_off; + int ofs; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } + protoff = ofs; break; } default: -- cgit v0.10.2 From 562d897d15a6e2bab3cc9b4c172286b612834fe8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 15 Sep 2015 10:50:14 -0600 Subject: net: Add documentation for VRF device Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt new file mode 100644 index 0000000..031ef4a --- /dev/null +++ b/Documentation/networking/vrf.txt @@ -0,0 +1,96 @@ +Virtual Routing and Forwarding (VRF) +==================================== +The VRF device combined with ip rules provides the ability to create virtual +routing and forwarding domains (aka VRFs, VRF-lite to be specific) in the +Linux network stack. One use case is the multi-tenancy problem where each +tenant has their own unique routing tables and in the very least need +different default gateways. + +Processes can be "VRF aware" by binding a socket to the VRF device. Packets +through the socket then use the routing table associated with the VRF +device. An important feature of the VRF device implementation is that it +impacts only Layer 3 and above so L2 tools (e.g., LLDP) are not affected +(ie., they do not need to be run in each VRF). The design also allows +the use of higher priority ip rules (Policy Based Routing, PBR) to take +precedence over the VRF device rules directing specific traffic as desired. + +In addition, VRF devices allow VRFs to be nested within namespaces. For +example network namespaces provide separation of network interfaces at L1 +(Layer 1 separation), VLANs on the interfaces within a namespace provide +L2 separation and then VRF devices provide L3 separation. + +Design +------ +A VRF device is created with an associated route table. Network interfaces +are then enslaved to a VRF device: + + +-----------------------------+ + | vrf-blue | ===> route table 10 + +-----------------------------+ + | | | + +------+ +------+ +-------------+ + | eth1 | | eth2 | ... | bond1 | + +------+ +------+ +-------------+ + | | + +------+ +------+ + | eth8 | | eth9 | + +------+ +------+ + +Packets received on an enslaved device and are switched to the VRF device +using an rx_handler which gives the impression that packets flow through +the VRF device. Similarly on egress routing rules are used to send packets +to the VRF device driver before getting sent out the actual interface. This +allows tcpdump on a VRF device to capture all packets into and out of the +VRF as a whole.[1] Similiarly, netfilter [2] and tc rules can be applied +using the VRF device to specify rules that apply to the VRF domain as a whole. + +[1] Packets in the forwarded state do not flow through the device, so those + packets are not seen by tcpdump. Will revisit this limitation in a + future release. + +[2] Iptables on ingress is limited to NF_INET_PRE_ROUTING only with skb->dev + set to real ingress device and egress is limited to NF_INET_POST_ROUTING. + Will revisit this limitation in a future release. + + +Setup +----- +1. VRF device is created with an association to a FIB table. + e.g, ip link add vrf-blue type vrf table 10 + ip link set dev vrf-blue up + +2. Rules are added that send lookups to the associated FIB table when the + iif or oif is the VRF device. e.g., + ip ru add oif vrf-blue table 10 + ip ru add iif vrf-blue table 10 + + Set the default route for the table (and hence default route for the VRF). + e.g, ip route add table 10 prohibit default + +3. Enslave L3 interfaces to a VRF device. + e.g, ip link set dev eth1 master vrf-blue + + Local and connected routes for enslaved devices are automatically moved to + the table associated with VRF device. Any additional routes depending on + the enslaved device will need to be reinserted following the enslavement. + +4. Additional VRF routes are added to associated table. + e.g., ip route add table 10 ... + + +Applications +------------ +Applications that are to work within a VRF need to bind their socket to the +VRF device: + + setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, dev, strlen(dev)+1); + +or to specify the output device using cmsg and IP_PKTINFO. + + +Limitations +----------- +VRF device currently only works for IPv4. Support for IPv6 is under development. + +Index of original ingress interface is not available via cmsg. Will address +soon. diff --git a/MAINTAINERS b/MAINTAINERS index 310da42..d4d9d4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11243,6 +11243,7 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/vrf.c F: include/net/vrf.h +F: Documentation/networking/vrf.txt VT1211 HARDWARE MONITOR DRIVER M: Juerg Haefliger -- cgit v0.10.2 From 2e64126bb0fb581b55e52e51ec451ebb0d6769c8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 15 Sep 2015 10:33:07 +0200 Subject: net: qdisc: enhance default_qdisc documentation Aside from some lingual cleanup, point out which interfaces are not or partly covered by this setting. Signed-off-by: Phil Sutter Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 6294b51..809ab6e 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,13 +54,15 @@ default_qdisc -------------- The default queuing discipline to use for network devices. This allows -overriding the default queue discipline of pfifo_fast with an -alternative. Since the default queuing discipline is created with the -no additional parameters so is best suited to queuing disciplines that -work well without configuration like stochastic fair queue (sfq), -CoDel (codel) or fair queue CoDel (fq_codel). Don't use queuing disciplines -like Hierarchical Token Bucket or Deficit Round Robin which require setting -up classes and bandwidths. +overriding the default of pfifo_fast with an alternative. Since the default +queuing discipline is created without additional parameters so is best suited +to queuing disciplines that work well without configuration like stochastic +fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use +queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin +which require setting up classes and bandwidths. Note that physical multiqueue +interfaces still use mq as root qdisc, which in turn uses this default for its +leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead +default to noqueue. Default: pfifo_fast busy_read -- cgit v0.10.2 From d828755eae637c6ca39e30702e98abaf34cac146 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 15 Sep 2015 17:20:09 +0530 Subject: cxgb4: add device ID for few T5 adapters Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 8353a6c..03ed00c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -157,6 +157,11 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5090), /* Custom T540-CR */ CH_PCI_ID_TABLE_FENTRY(0x5091), /* Custom T522-CR */ CH_PCI_ID_TABLE_FENTRY(0x5092), /* Custom T520-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5093), /* Custom T580-LP-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5094), /* Custom T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5095), /* Custom T540-CR-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5096), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5097), /* Custom T520-KR */ /* T6 adapters: */ -- cgit v0.10.2 From 58189ca7b27411c3dc9a5cb9eeee0906da684c59 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 15 Sep 2015 15:10:50 -0700 Subject: net: Fix vti use case with oif in dst lookups Steffen reported that the recent change to add oif to dst lookups breaks the VTI use case. The problem is that with the oif set in the flow struct the comparison to the nh_oif is triggered. Fix by splitting the FLOWI_FLAG_VRFSRC into 2 flags -- one that triggers the vrf device cache bypass (FLOWI_FLAG_VRFSRC) and another telling the lookup to not compare nh oif (FLOWI_FLAG_SKIP_NH_OIF). Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups") Signed-off-by: David Ahern Acked-by: Steffen Klassert Signed-off-by: David S. Miller diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e7094fb..488c6f5 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -193,7 +193,8 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC | + FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; diff --git a/include/net/flow.h b/include/net/flow.h index acd6a09..9b85db8 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -35,6 +35,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_VRFSRC 0x04 +#define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; diff --git a/include/net/route.h b/include/net/route.h index cc61cb9..f46af25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -255,7 +255,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; if (netif_index_is_vrf(sock_net(sk), oif)) - flow_flags |= FLOWI_FLAG_VRFSRC; + flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 26d6ffb..6c2af79 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1426,7 +1426,7 @@ found: nh->nh_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; - if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c0a15e7..f7d1d5e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1024,7 +1024,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (netif_index_is_vrf(net, ipc.oif)) { flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - (flow_flags | FLOWI_FLAG_VRFSRC), + (flow_flags | FLOWI_FLAG_VRFSRC | + FLOWI_FLAG_SKIP_NH_OIF), faddr, saddr, dport, inet->inet_sport); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bb919b2..c10a9ee 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -33,6 +33,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, if (saddr) fl4->saddr = saddr->a4; + fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; + rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; -- cgit v0.10.2 From 8a1513b49321e503fd6c8b6793e3b1f9a8a3285b Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 11 Sep 2015 10:40:17 -0500 Subject: hp-wmi: limit hotkey enable Do not write initialize magic on systems that do not have feature query 0xb. Fixes Bug #82451. Redefine FEATURE_QUERY to align with 0xb and FEATURE2 with 0xd for code clearity. Add a new test function, hp_wmi_bios_2008_later() & simplify hp_wmi_bios_2009_later(), which fixes a bug in cases where an improper value is returned. Probably also fixes Bug #69131. Add missing __init tag. Signed-off-by: Kyle Evans Cc: stable@vger.kernel.org Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0669731..fb4dd7b 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); -- cgit v0.10.2 From 4671fc6d47e0a0108fe24a4d830347d6a6ef4aa7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Sep 2015 18:29:47 -0700 Subject: net/mlx4_en: really allow to change RSS key When changing rss key, we do not want to overwrite user provided key by the one provided by netdev_rss_key_fill(), which is the host random key generated at boot time. Fixes: 947cbb0ac242 ("net/mlx4_en: Support for configurable RSS hash function") Signed-off-by: Eric Dumazet Cc: Eyal Perry CC: Amir Vadai Acked-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4402a1e..0ce6ffe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1268,8 +1268,6 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->hash_fn = MLX4_RSS_HASH_TOP; memcpy(rss_context->rss_key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); - netdev_rss_key_fill(rss_context->rss_key, - MLX4_EN_RSS_KEY_SIZE); } else { en_err(priv, "Unknown RSS hash function requested\n"); err = -EINVAL; -- cgit v0.10.2 From 3aaf14da807a4e9931a37f21e4251abb8a67021b Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 17 Sep 2015 16:01:40 -0700 Subject: zram: fix possible use after free in zcomp_create() zcomp_create() verifies the success of zcomp_strm_{multi,single}_create() through comp->stream, which can potentially be pointing to memory that was freed if these functions returned an error. While at it, replace a 'ERR_PTR(-ENOMEM)' by a more generic 'ERR_PTR(error)' as in the future zcomp_strm_{multi,siggle}_create() could return other error codes. Function documentation updated accordingly. Fixes: beca3ec71fe5 ("zram: add multi stream functionality") Signed-off-by: Luis Henriques Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 965d1af..5cb13ca 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -330,12 +330,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -347,12 +349,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } -- cgit v0.10.2 From 8d77a6d18ae9ccfd5eee1cc551ee4ac27fd41464 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Thu, 17 Sep 2015 16:01:43 -0700 Subject: kasan: fix last shadow judgement in memory_is_poisoned_16() The shadow which correspond 16 bytes memory may span 2 or 3 bytes. If the memory is aligned on 8, then the shadow takes only 2 bytes. So we check "shadow_first_bytes" is enough, and need not to call "memory_is_poisoned_1(addr + 15);". But the code "if (likely(!last_byte))" is wrong judgement. e.g. addr=0, so last_byte = 15 & KASAN_SHADOW_MASK = 7, then the code will continue to call "memory_is_poisoned_1(addr + 15);" Signed-off-by: Xishi Qiu Acked-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Rusty Russell Cc: Michal Marek Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9c..8da2114 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); -- cgit v0.10.2 From 969560d2afca76823cf97ec4f5c0fb7833e18553 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:46 -0700 Subject: alpha: io: define ioremap_uc ioremap_uc was not defined and as a result while building with allmodconfig were getting build error of: implicit declaration of function 'ioremap_uc'. Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index f05bdb4..ff40491 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -297,7 +297,9 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, unsigned long size) { return ioremap(offset, size); -} +} + +#define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) { -- cgit v0.10.2 From 14b97deddf8ddecce9f35165b667c55c73e14638 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:49 -0700 Subject: alpha: lib: export __delay __delay was not exported as a result while building with allmodconfig we were getting build error of undefined symbol. __delay is being used by: drivers/net/phy/mdio-octeon.c Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c index 69d52aa..f2d81ff 100644 --- a/arch/alpha/lib/udelay.c +++ b/arch/alpha/lib/udelay.c @@ -30,6 +30,7 @@ __delay(int loops) " bgt %0,1b" : "=&r" (tmp), "=r" (loops) : "1"(loops)); } +EXPORT_SYMBOL(__delay); #ifdef CONFIG_SMP #define LPJ cpu_data[smp_processor_id()].loops_per_jiffy -- cgit v0.10.2 From 62bef58a55dfa8ada2a22b2496c6340468ecd98a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 17 Sep 2015 16:01:51 -0700 Subject: lib/string_helpers.c: fix infinite loop in string_get_size() Some string_get_size() calls (e.g.: string_get_size(1, 512, STRING_UNITS_10, ..., ...) string_get_size(15, 64, STRING_UNITS_10, ..., ...) ) result in an infinite loop. The problem is that if size is equal to divisor[units]/blk_size and is smaller than divisor[units] we'll end up with size == 0 when we start doing sf_cap calculations: For string_get_size(1, 512, STRING_UNITS_10, ..., ...) case: ... remainder = do_div(size, divisor[units]); -> size is 0, remainder is 1 remainder *= blk_size; -> remainder is 512 ... size *= blk_size; -> size is still 0 size += remainder / divisor[units]; -> size is still 0 The caller causing the issue is sd_read_capacity(), the problem was noticed on Hyper-V, such weird size was reported by host when scanning collides with device removal. This is probably a separate issue worth fixing, this patch is intended to prevent the library routine from infinite looping. Signed-off-by: Vitaly Kuznetsov Acked-by: James Bottomley Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "K. Y. Srinivasan" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 54036ce..5939f63 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -59,7 +59,11 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } exp = divisor[units] / (u32)blk_size; - if (size >= exp) { + /* + * size must be strictly greater than exp here to ensure that remainder + * is greater than divisor[units] coming out of the if below. + */ + if (size > exp) { remainder = do_div(size, divisor[units]); remainder *= blk_size; i++; -- cgit v0.10.2 From c03e946fdd653c4a23e242aca83da7e9838f5b00 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 17 Sep 2015 16:01:54 -0700 Subject: userfaultfd: add missing mmput() in error path This fixes a memleak if anon_inode_getfile() fails in userfaultfd(). Signed-off-by: Eric Biggers Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 634e676..f9aeb40 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags) file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) + if (IS_ERR(file)) { + mmput(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); + } out: return file; } -- cgit v0.10.2 From 0526109a24eb07984f9e79852767300c8b8144de Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Thu, 17 Sep 2015 16:01:57 -0700 Subject: MAINTAINERS: update LTP mailing list [akpm@linux-foundation.org: Wanlong Gao has moved] Signed-off-by: Cyril Hrubis Cc: Jan Stancek Cc: Stanislav Kholmanskikh Cc: Alexey Kodanev Cc: Wanlong Gao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab7..274f854 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6452,11 +6452,11 @@ F: drivers/hwmon/ltc4261.c LTP (Linux Test Project) M: Mike Frysinger M: Cyril Hrubis -M: Wanlong Gao +M: Wanlong Gao M: Jan Stancek M: Stanislav Kholmanskikh M: Alexey Kodanev -L: ltp-list@lists.sourceforge.net (subscribers-only) +L: ltp@lists.linux.it (subscribers-only) W: http://linux-test-project.github.io/ T: git git://github.com/linux-test-project/ltp.git S: Maintained -- cgit v0.10.2 From 28c553d0aa0acf02e18f9e008661491a4b996595 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 17 Sep 2015 16:02:00 -0700 Subject: revert "mm: make sure all file VMAs have ->vm_ops set" Revert commit 6dc296e7df4c "mm: make sure all file VMAs have ->vm_ops set". Will Deacon reports that it "causes some mmap regressions in LTP, which appears to use a MAP_PRIVATE mmap of /dev/zero as a way to get anonymous pages in some of its tests (specifically mmap10 [1])". William Shuman reports Oracle crashes. So revert the patch while we work out what to do. Reported-by: William Shuman Reported-by: Will Deacon Cc: Kirill A. Shutemov Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2c..c739d6d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1638,12 +1636,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { -- cgit v0.10.2 From 88c796640eac36209efabe5e42b7b47dee58603e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 16 Sep 2015 11:11:22 +0200 Subject: net: ks8851: Export OF module alias information Drivers needs to export the OF id table and this be built into the module or udev won't have the necessary information to autoload the driver module when the device is registered via OF. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 66d4ab7..60f43ec 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1601,6 +1601,7 @@ static const struct of_device_id ks8851_match_table[] = { { .compatible = "micrel,ks8851" }, { } }; +MODULE_DEVICE_TABLE(of, ks8851_match_table); static struct spi_driver ks8851_driver = { .driver = { -- cgit v0.10.2 From ce816eb064c82ab96276969971a561db78e66164 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 12:35:00 +0100 Subject: solos-pci: Increase headroom on received packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A comment in include/linux/skbuff.h says that: * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. This was demonstrated by a panic when handling fragmented IPv6 packets: http://marc.info/?l=linux-netdev&m=144236093519172&w=2 It's not entirely clear if that comment is still valid — and if it is, perhaps netif_rx() ought to be enforcing it with a warning. But either way, it is rather stupid from a performance point of view for us to be receiving packets into a buffer which doesn't have enough room to prepend an Ethernet header — it means that *every* incoming packet is going to be need to be reallocated. So let's fix that. Signed-off-by: David Woodhouse Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 74e18b0..3d7fb65 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -805,7 +805,12 @@ static void solos_bh(unsigned long card_arg) continue; } - skb = alloc_skb(size + 1, GFP_ATOMIC); + /* Use netdev_alloc_skb() because it adds NET_SKB_PAD of + * headroom, and ensures we can route packets back out an + * Ethernet interface (for example) without having to + * reallocate. Adding NET_IP_ALIGN also ensures that both + * PPPoATM and PPPoEoBR2684 packets end up aligned. */ + skb = netdev_alloc_skb_ip_align(NULL, size + 1); if (!skb) { if (net_ratelimit()) dev_warn(&card->dev->dev, "Failed to allocate sk_buff for RX\n"); @@ -869,7 +874,10 @@ static void solos_bh(unsigned long card_arg) /* Allocate RX skbs for any ports which need them */ if (card->using_dma && card->atmdev[port] && !card->rx_skb[port]) { - struct sk_buff *skb = alloc_skb(RX_DMA_SIZE, GFP_ATOMIC); + /* Unlike the MMIO case (qv) we can't add NET_IP_ALIGN + * here; the FPGA can only DMA to addresses which are + * aligned to 4 bytes. */ + struct sk_buff *skb = dev_alloc_skb(RX_DMA_SIZE); if (skb) { SKB_CB(skb)->dma_addr = dma_map_single(&card->dev->dev, skb->data, -- cgit v0.10.2 From 1d325d217c7f190a42fb620ead20bb240fc16af0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 16 Sep 2015 17:26:14 +0200 Subject: ipv6: ip6_fragment: fix headroom tests and skb leak David Woodhouse reports skb_under_panic when we try to push ethernet header to fragmented ipv6 skbs: skbuff: skb_under_panic: text:c1277f1e len:1294 put:14 head:dec98000 data:dec97ffc tail:0xdec9850a end:0xdec98f40 dev:br-lan [..] ip6_finish_output2+0x196/0x4da David further debugged this: [..] offending fragments were arriving here with skb_headroom(skb)==10. Which is reasonable, being the Solos ADSL card's header of 8 bytes followed by 2 bytes of PPP frame type. The problem is that if netfilter ipv6 defragmentation is used, skb_cow() in ip6_forward will only see reassembled skb. Therefore, headroom is overestimated by 8 bytes (we pulled fragment header) and we don't check the skbs in the frag_list either. We can't do these checks in netfilter defrag since outdev isn't known yet. Furthermore, existing tests in ip6_fragment did not consider the fragment or ipv6 header size when checking headroom of the fraglist skbs. While at it, also fix a skb leak on memory allocation -- ip6_fragment must consume the skb. I tested this e1000 driver hacked to not allocate additional headroom (we end up in slowpath, since LL_RESERVED_SPACE is 16). If 2 bytes of headroom are allocated, fastpath is taken (14 byte ethernet header was pulled, so 16 byte headroom available in all fragments). Reported-by: David Woodhouse Diagnosed-by: David Woodhouse Signed-off-by: Florian Westphal Tested-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 26ea479..92b1aa3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -586,20 +586,22 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); + hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || - skb_cloned(skb)) + skb_cloned(skb) || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || - skb_headroom(frag) < hlen) + skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ @@ -616,8 +618,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, err = 0; offset = 0; - frag = skb_shinfo(skb)->frag_list; - skb_frag_list_init(skb); /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; @@ -625,8 +625,11 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (!tmp_hdr) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); - return -ENOMEM; + err = -ENOMEM; + goto fail; } + frag = skb_shinfo(skb)->frag_list; + skb_frag_list_init(skb); __skb_pull(skb, hlen); fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); @@ -723,7 +726,6 @@ slow_path: */ *prevhdr = NEXTHDR_FRAGMENT; - hroom = LL_RESERVED_SPACE(rt->dst.dev); troom = rt->dst.dev->needed_tailroom; /* -- cgit v0.10.2 From 34f5b0066435ffb793049b84fafd29fa195bcf90 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 16 Sep 2015 15:30:21 -0400 Subject: atm: deal with setting entry before mkip was called If we didn't call ATMARP_MKIP before ATMARP_ENCAP the VCC descriptor is non-existant and we'll end up dereferencing a NULL ptr: [1033173.491930] kasan: GPF could be caused by NULL-ptr deref or user memory accessirq event stamp: 123386 [1033173.493678] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN [1033173.493689] Modules linked in: [1033173.493697] CPU: 9 PID: 23815 Comm: trinity-c64 Not tainted 4.2.0-next-20150911-sasha-00043-g353d875-dirty #2545 [1033173.493706] task: ffff8800630c4000 ti: ffff880063110000 task.ti: ffff880063110000 [1033173.493823] RIP: clip_ioctl (net/atm/clip.c:320 net/atm/clip.c:689) [1033173.493826] RSP: 0018:ffff880063117a88 EFLAGS: 00010203 [1033173.493828] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 000000000000000c [1033173.493830] RDX: 0000000000000002 RSI: ffffffffb3f10720 RDI: 0000000000000014 [1033173.493832] RBP: ffff880063117b80 R08: ffff88047574d9a4 R09: 0000000000000000 [1033173.493834] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff1000c622f53 [1033173.493836] R13: ffff8800cb905500 R14: ffff8808d6da2000 R15: 00000000fffffdfd [1033173.493840] FS: 00007fa56b92d700(0000) GS:ffff880478000000(0000) knlGS:0000000000000000 [1033173.493843] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [1033173.493845] CR2: 0000000000000000 CR3: 00000000630e8000 CR4: 00000000000006a0 [1033173.493855] Stack: [1033173.493862] ffffffffb0b60444 000000000000eaea 0000000041b58ab3 ffffffffb3c3ce32 [1033173.493867] ffffffffb0b6f3e0 ffffffffb0b60444 ffffffffb5ea2e50 1ffff1000c622f5e [1033173.493873] ffff8800630c4cd8 00000000000ee09a ffffffffb3ec4888 ffffffffb5ea2de8 [1033173.493874] Call Trace: [1033173.494108] do_vcc_ioctl (net/atm/ioctl.c:170) [1033173.494113] vcc_ioctl (net/atm/ioctl.c:189) [1033173.494116] svc_ioctl (net/atm/svc.c:605) [1033173.494200] sock_do_ioctl (net/socket.c:874) [1033173.494204] sock_ioctl (net/socket.c:958) [1033173.494244] do_vfs_ioctl (fs/ioctl.c:43 fs/ioctl.c:607) [1033173.494290] SyS_ioctl (fs/ioctl.c:622 fs/ioctl.c:613) [1033173.494295] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:186) [1033173.494362] Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 50 09 00 00 49 8b 9e 60 06 00 00 48 b8 00 00 00 00 00 fc ff df 48 8d 7b 14 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 14 09 00 All code ======== 0: fa cli 1: 48 c1 ea 03 shr $0x3,%rdx 5: 80 3c 02 00 cmpb $0x0,(%rdx,%rax,1) 9: 0f 85 50 09 00 00 jne 0x95f f: 49 8b 9e 60 06 00 00 mov 0x660(%r14),%rbx 16: 48 b8 00 00 00 00 00 movabs $0xdffffc0000000000,%rax 1d: fc ff df 20: 48 8d 7b 14 lea 0x14(%rbx),%rdi 24: 48 89 fa mov %rdi,%rdx 27: 48 c1 ea 03 shr $0x3,%rdx 2b:* 0f b6 04 02 movzbl (%rdx,%rax,1),%eax <-- trapping instruction 2f: 48 89 fa mov %rdi,%rdx 32: 83 e2 07 and $0x7,%edx 35: 38 d0 cmp %dl,%al 37: 7f 08 jg 0x41 39: 84 c0 test %al,%al 3b: 0f 85 14 09 00 00 jne 0x955 Code starting with the faulting instruction =========================================== 0: 0f b6 04 02 movzbl (%rdx,%rax,1),%eax 4: 48 89 fa mov %rdi,%rdx 7: 83 e2 07 and $0x7,%edx a: 38 d0 cmp %dl,%al c: 7f 08 jg 0x16 e: 84 c0 test %al,%al 10: 0f 85 14 09 00 00 jne 0x92a [1033173.494366] RIP clip_ioctl (net/atm/clip.c:320 net/atm/clip.c:689) [1033173.494368] RSP Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/net/atm/clip.c b/net/atm/clip.c index 17e55df..e07f551 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -317,6 +317,9 @@ static int clip_constructor(struct neighbour *neigh) static int clip_encap(struct atm_vcc *vcc, int mode) { + if (!CLIP_VCC(vcc)) + return -EBADFD; + CLIP_VCC(vcc)->encap = mode; return 0; } -- cgit v0.10.2 From 980137a20317055451a73547cf07be4ddea039ee Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 17 Sep 2015 15:18:34 +0200 Subject: ARCNET: fix hard_header_len limit For arcnet the bare minimum header only contains the 4 bytes to specify source, dest and offset (1, 1 and 2 bytes respectively). The corresponding struct is struct arc_hardware. The struct archdr contains additionally a union of possible soft headers. When doing $insertusecasehere packets might well include short (or even no?) soft headers. For this reason only use arc_hardware instead of archdr to determine the hard_header_len for an arcnet device. Signed-off-by: Michael Grzeschik Signed-off-by: David S. Miller diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 10f71c73..816d0e9 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -326,7 +326,7 @@ static void arcdev_setup(struct net_device *dev) dev->type = ARPHRD_ARCNET; dev->netdev_ops = &arcnet_netdev_ops; dev->header_ops = &arcnet_header_ops; - dev->hard_header_len = sizeof(struct archdr); + dev->hard_header_len = sizeof(struct arc_hardware); dev->mtu = choose_mtu(); dev->addr_len = ARCNET_ALEN; -- cgit v0.10.2 From c38f6ac74c99801360705d97244ff222ae18dc97 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 17 Sep 2015 15:26:16 +0200 Subject: MAINTAINERS: add arcnet and take maintainership Add entry for arcnet to MAINTAINERS file and add myself as the maintainer of the subsystem. Signed-off-by: Michael Grzeschik Cc: davem@davemloft.net Cc: joe@perches.com Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index d4d9d4f..c6aca87 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -808,6 +808,13 @@ S: Maintained F: drivers/video/fbdev/arcfb.c F: drivers/video/fbdev/core/fb_defio.c +ARCNET NETWORK LAYER +M: Michael Grzeschik +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/arcnet/ +F: include/uapi/linux/if_arcnet.h + ARM MFM AND FLOPPY DRIVERS M: Ian Molton S: Maintained -- cgit v0.10.2 From 9dc2ad1008c9f91f55ec6c89ec0f8639dfc91596 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:11:10 +0200 Subject: vxlan: set needed headroom correctly vxlan_setup is called when allocating the net_device, i.e. way before vxlan_newlink (or vxlan_dev_configure) is called. This means vxlan->default_dst is actually unset in vxlan_setup and the condition that sets needed_headroom always takes the else branch. Set the needed_headrom at the point when we have the information about the address family available. Fixes: e4c7ed415387c ("vxlan: add ipv6 support") Fixes: 2853af6a2ea1a ("vxlan: use dev->needed_headroom instead of dev->hard_header_len") CC: Cong Wang Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index cf8b7f0..6ebe562 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2392,10 +2392,6 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); - if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6) - dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; - else - dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; dev->netdev_ops = &vxlan_netdev_ops; dev->destructor = free_netdev; @@ -2670,8 +2666,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->needed_headroom = lowerdev->hard_header_len + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); - } else if (use_ipv6) + } else if (use_ipv6) { vxlan->flags |= VXLAN_F_IPV6; + dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; + } else { + dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; + } memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) -- cgit v0.10.2 From 057ba29bbe85e9587635e3128b26fa30fe849af9 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:11:11 +0200 Subject: vxlan: reject IPv6 addresses if IPv6 is not configured When IPv6 address is set without IPv6 configured, the vxlan socket is mostly treated as an IPv4 one but various lookus in fdb etc. still take the AF_INET6 into account. This creates incosistencies with weird consequences. Just reject IPv6 addresses in such case. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6ebe562..bbac1d3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2636,8 +2636,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dst->remote_ip.sa.sa_family = AF_INET; if (dst->remote_ip.sa.sa_family == AF_INET6 || - vxlan->cfg.saddr.sa.sa_family == AF_INET6) + vxlan->cfg.saddr.sa.sa_family == AF_INET6) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; use_ipv6 = true; + } if (conf->remote_ifindex) { struct net_device *lowerdev -- cgit v0.10.2 From 378fddc281072a10b621341b9f78c902303a8fe7 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:11:12 +0200 Subject: qlcnic: track vxlan port count The callback for adding vxlan port can be called with the same port for both IPv4 and IPv6. Do not disable the offloading when the same port for both protocols is added and later one of them removed. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 06bcc73..d6696cf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -536,6 +536,7 @@ struct qlcnic_hardware_context { u8 extend_lb_time; u8 phys_port_id[ETH_ALEN]; u8 lb_mode; + u8 vxlan_port_count; u16 vxlan_port; struct device *hwmon_dev; u32 post_mode; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 8b08b20..d448145 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -483,11 +483,17 @@ static void qlcnic_add_vxlan_port(struct net_device *netdev, /* Adapter supports only one VXLAN port. Use very first port * for enabling offload */ - if (!qlcnic_encap_rx_offload(adapter) || ahw->vxlan_port) + if (!qlcnic_encap_rx_offload(adapter)) return; + if (!ahw->vxlan_port_count) { + ahw->vxlan_port_count = 1; + ahw->vxlan_port = ntohs(port); + adapter->flags |= QLCNIC_ADD_VXLAN_PORT; + return; + } + if (ahw->vxlan_port == ntohs(port)) + ahw->vxlan_port_count++; - ahw->vxlan_port = ntohs(port); - adapter->flags |= QLCNIC_ADD_VXLAN_PORT; } static void qlcnic_del_vxlan_port(struct net_device *netdev, @@ -496,11 +502,13 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev, struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_hardware_context *ahw = adapter->ahw; - if (!qlcnic_encap_rx_offload(adapter) || !ahw->vxlan_port || + if (!qlcnic_encap_rx_offload(adapter) || !ahw->vxlan_port_count || (ahw->vxlan_port != ntohs(port))) return; - adapter->flags |= QLCNIC_DEL_VXLAN_PORT; + ahw->vxlan_port_count--; + if (!ahw->vxlan_port_count) + adapter->flags |= QLCNIC_DEL_VXLAN_PORT; } static netdev_features_t qlcnic_features_check(struct sk_buff *skb, -- cgit v0.10.2 From 1e5b311ab2cc3baf73cdff066ab39c7466bf166b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:11:13 +0200 Subject: be2net: allow offloading with the same port for IPv4 and IPv6 The callback for adding vxlan port can be called with the same port for both IPv4 and IPv6. Do not disable the offloading if this occurs. Signed-off-by: Jiri Benc Acked-by: Sathya Perla Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 0a27805..8215409 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -582,6 +582,7 @@ struct be_adapter { u16 pvid; __be16 vxlan_port; int vxlan_port_count; + int vxlan_port_aliases; struct phy_info phy; u8 wol_cap; bool wol_en; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 12687bf..7bf51a1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5176,6 +5176,11 @@ static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) return; + if (adapter->vxlan_port == port && adapter->vxlan_port_count) { + adapter->vxlan_port_aliases++; + return; + } + if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) { dev_info(dev, "Only one UDP port supported for VxLAN offloads\n"); @@ -5226,6 +5231,11 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, if (adapter->vxlan_port != port) goto done; + if (adapter->vxlan_port_aliases) { + adapter->vxlan_port_aliases--; + return; + } + be_disable_vxlan_offloads(adapter); dev_info(&adapter->pdev->dev, -- cgit v0.10.2 From ac7eccd4d48fcc70d9fd6e4d10657bcde0a73f9f Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:11:14 +0200 Subject: bnx2x: track vxlan port count The callback for adding vxlan port can be called with the same port for both IPv4 and IPv6. Do not disable the offloading when the same port for both protocols is added and later one of them removed. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index ba93663..b5e64b0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1946,6 +1946,7 @@ struct bnx2x { u16 vlan_cnt; u16 vlan_credit; u16 vxlan_dst_port; + u8 vxlan_dst_port_count; bool accept_any_vlan; }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 89a174f..f1d62d5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10108,12 +10108,18 @@ static void __bnx2x_add_vxlan_port(struct bnx2x *bp, u16 port) if (!netif_running(bp->dev)) return; - if (bp->vxlan_dst_port || !IS_PF(bp)) { + if (bp->vxlan_dst_port_count && bp->vxlan_dst_port == port) { + bp->vxlan_dst_port_count++; + return; + } + + if (bp->vxlan_dst_port_count || !IS_PF(bp)) { DP(BNX2X_MSG_SP, "Vxlan destination port limit reached\n"); return; } bp->vxlan_dst_port = port; + bp->vxlan_dst_port_count = 1; bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_ADD_VXLAN_PORT, 0); } @@ -10128,10 +10134,14 @@ static void bnx2x_add_vxlan_port(struct net_device *netdev, static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port) { - if (!bp->vxlan_dst_port || bp->vxlan_dst_port != port || !IS_PF(bp)) { + if (!bp->vxlan_dst_port_count || bp->vxlan_dst_port != port || + !IS_PF(bp)) { DP(BNX2X_MSG_SP, "Invalid vxlan port\n"); return; } + bp->vxlan_dst_port--; + if (bp->vxlan_dst_port) + return; if (netif_running(bp->dev)) { bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_DEL_VXLAN_PORT, 0); -- cgit v0.10.2 From 6cf35642147103195f126d13bdc1d4c32c7c6666 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 17 Sep 2015 16:28:31 +0200 Subject: MAINTAINERS: remove bouncing email address for qlcnic I got this automated message from when submitting a qlcnic patch: > Shahed Shaikh is no longer with QLogic. If you require assistance please > contact Ariel Elior Ariel.Elior@qlogic.com There's no point in having a bouncing address in MAINTAINERS. CC: Dept-GELinuxNICDev@qlogic.com CC: Ariel Elior Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index c6aca87..e1c9fbb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8497,7 +8497,6 @@ F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Shahed Shaikh M: Dept-GELinuxNICDev@qlogic.com L: netdev@vger.kernel.org S: Supported -- cgit v0.10.2 From adb094e5e7285385770eb7a7c122bfc663c5e174 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 17 Sep 2015 23:21:21 +0900 Subject: fjes: fix off-by-one error at fjes_hw_update_zone_task() Dan Carpenter reported off-by-one error of fjes at http://www.mail-archive.com/netdev@vger.kernel.org/msg77520.html Actually this is a bug. ep_shm_info[epidx].{es_status, zone} should be update inside for loop. This patch fixes this bug. Reported-by: Dan Carpenter Signed-off-by: Taku Izumi Signed-off-by: David S. Miller diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index b5f4a78..2d3848c 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -1011,11 +1011,11 @@ static void fjes_hw_update_zone_task(struct work_struct *work) set_bit(epidx, &irq_bit); break; } - } - - hw->ep_shm_info[epidx].es_status = info[epidx].es_status; - hw->ep_shm_info[epidx].zone = info[epidx].zone; + hw->ep_shm_info[epidx].es_status = + info[epidx].es_status; + hw->ep_shm_info[epidx].zone = info[epidx].zone; + } break; } -- cgit v0.10.2 From c2e7204d180f8efc80f27959ca9cf16fa17f67db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Sep 2015 08:38:00 -0700 Subject: tcp_cubic: do not set epoch_start in the future Tracking idle time in bictcp_cwnd_event() is imprecise, as epoch_start is normally set at ACK processing time, not at send time. Doing a proper fix would need to add an additional state variable, and does not seem worth the trouble, given CUBIC bug has been there forever before Jana noticed it. Let's simply not set epoch_start in the future, otherwise bictcp_update() could overflow and CUBIC would again grow cwnd too fast. This was detected thanks to a packetdrill test Neal wrote that was flaky before applying this fix. Fixes: 30927520dbae ("tcp_cubic: better follow cubic curve after idle period") Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Cc: Jana Iyengar Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index c6ded6b..448c261 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -154,14 +154,20 @@ static void bictcp_init(struct sock *sk) static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { - s32 delta = tcp_time_stamp - tcp_sk(sk)->lsndtime; struct bictcp *ca = inet_csk_ca(sk); + u32 now = tcp_time_stamp; + s32 delta; + + delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. * Shift epoch_start to keep cwnd growth to cubic curve. */ - if (ca->epoch_start && delta > 0) + if (ca->epoch_start && delta > 0) { ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; + } return; } } -- cgit v0.10.2 From a55b2ae78cfecaa5ce0384c05211606372b9496c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Sep 2015 18:30:34 +0200 Subject: s390/compat: do not trace compat wrapper functions Add notrace to the compat wrapper define to disable tracing of compat wrapper functions. These are supposed to be very small and more or less just a trampoline to the real system call. Also fix indentation. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index f8498dd..994a22a 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -52,12 +52,12 @@ * the regular system call wrappers. */ #define COMPAT_SYSCALL_WRAPx(x, name, ...) \ - asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ - asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ - { \ - return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ - } +asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ +asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ +asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ +{ \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ +} COMPAT_SYSCALL_WRAP1(exit, int, error_code); COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); -- cgit v0.10.2 From 7681df456f97f21fb70f184702696d644e273d73 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Sep 2015 18:30:35 +0200 Subject: s390/compat: remove superfluous compat wrappers A couple of compat wrapper functions are simply trampolines to the real system call. This happened because the compat wrapper defines will only sign and zero extend system call parameters which are of different size on s390/s390x (longs and pointers). All other parameters will be correctly sign and zero extended by the normal system call wrappers. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 994a22a..ada6301 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -59,8 +59,6 @@ asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ } -COMPAT_SYSCALL_WRAP1(exit, int, error_code); -COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); @@ -68,23 +66,16 @@ COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); -COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP1(nice, int, increment); -COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); -COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); -COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); -COMPAT_SYSCALL_WRAP1(umask, int, mask); COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); -COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); @@ -93,37 +84,23 @@ COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); -COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); -COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); -COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); -COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); -COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); -COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); -COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); -COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); -COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); -COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); -COMPAT_SYSCALL_WRAP1(mlockall, int, flags); COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); -COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); -COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); @@ -131,20 +108,11 @@ COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); -COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); -COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); -COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); -COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); -COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); @@ -161,23 +129,16 @@ COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); -COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); -COMPAT_SYSCALL_WRAP1(epoll_create, int, size); COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); -COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); -COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); -COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); -COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); -COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); @@ -192,23 +153,11 @@ COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); -COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); -COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); -COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); -COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); -COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); -COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); -COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); -COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls); -COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); -COMPAT_SYSCALL_WRAP1(syncfs, int, fd); -COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); -COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index e4167bb..996f95a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -9,12 +9,12 @@ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall) NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,compat_sys_exit) +SYSCALL(sys_exit,sys_exit) SYSCALL(sys_fork,sys_fork) SYSCALL(sys_read,compat_sys_s390_read) SYSCALL(sys_write,compat_sys_s390_write) SYSCALL(sys_open,compat_sys_open) /* 5 */ -SYSCALL(sys_close,compat_sys_close) +SYSCALL(sys_close,sys_close) SYSCALL(sys_restart_syscall,sys_restart_syscall) SYSCALL(sys_creat,compat_sys_creat) SYSCALL(sys_link,compat_sys_link) @@ -35,21 +35,21 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ SYSCALL(sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ SYSCALL(sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */ SYSCALL(sys_ptrace,compat_sys_ptrace) -SYSCALL(sys_alarm,compat_sys_alarm) +SYSCALL(sys_alarm,sys_alarm) NI_SYSCALL /* old fstat syscall */ SYSCALL(sys_pause,sys_pause) SYSCALL(sys_utime,compat_sys_utime) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ SYSCALL(sys_access,compat_sys_access) -SYSCALL(sys_nice,compat_sys_nice) +SYSCALL(sys_nice,sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync) -SYSCALL(sys_kill,compat_sys_kill) +SYSCALL(sys_kill,sys_kill) SYSCALL(sys_rename,compat_sys_rename) SYSCALL(sys_mkdir,compat_sys_mkdir) SYSCALL(sys_rmdir,compat_sys_rmdir) /* 40 */ -SYSCALL(sys_dup,compat_sys_dup) +SYSCALL(sys_dup,sys_dup) SYSCALL(sys_pipe,compat_sys_pipe) SYSCALL(sys_times,compat_sys_times) NI_SYSCALL /* old prof syscall */ @@ -65,13 +65,13 @@ NI_SYSCALL /* old lock syscall */ SYSCALL(sys_ioctl,compat_sys_ioctl) SYSCALL(sys_fcntl,compat_sys_fcntl) /* 55 */ NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,compat_sys_setpgid) +SYSCALL(sys_setpgid,sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,compat_sys_umask) /* 60 */ +SYSCALL(sys_umask,sys_umask) /* 60 */ SYSCALL(sys_chroot,compat_sys_chroot) SYSCALL(sys_ustat,compat_sys_ustat) -SYSCALL(sys_dup2,compat_sys_dup2) +SYSCALL(sys_dup2,sys_dup2) SYSCALL(sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp) /* 65 */ SYSCALL(sys_setsid,sys_setsid) @@ -102,10 +102,10 @@ SYSCALL(sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ SYSCALL(sys_munmap,compat_sys_munmap) SYSCALL(sys_truncate,compat_sys_truncate) SYSCALL(sys_ftruncate,compat_sys_ftruncate) -SYSCALL(sys_fchmod,compat_sys_fchmod) +SYSCALL(sys_fchmod,sys_fchmod) SYSCALL(sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,compat_sys_getpriority) -SYSCALL(sys_setpriority,compat_sys_setpriority) +SYSCALL(sys_getpriority,sys_getpriority) +SYSCALL(sys_setpriority,sys_setpriority) NI_SYSCALL /* old profil syscall */ SYSCALL(sys_statfs,compat_sys_statfs) SYSCALL(sys_fstatfs,compat_sys_fstatfs) /* 100 */ @@ -126,7 +126,7 @@ SYSCALL(sys_wait4,compat_sys_wait4) SYSCALL(sys_swapoff,compat_sys_swapoff) /* 115 */ SYSCALL(sys_sysinfo,compat_sys_sysinfo) SYSCALL(sys_s390_ipc,compat_sys_s390_ipc) -SYSCALL(sys_fsync,compat_sys_fsync) +SYSCALL(sys_fsync,sys_fsync) SYSCALL(sys_sigreturn,compat_sys_sigreturn) SYSCALL(sys_clone,compat_sys_clone) /* 120 */ SYSCALL(sys_setdomainname,compat_sys_setdomainname) @@ -140,35 +140,35 @@ SYSCALL(sys_init_module,compat_sys_init_module) SYSCALL(sys_delete_module,compat_sys_delete_module) NI_SYSCALL /* 130: old get_kernel_syms */ SYSCALL(sys_quotactl,compat_sys_quotactl) -SYSCALL(sys_getpgid,compat_sys_getpgid) -SYSCALL(sys_fchdir,compat_sys_fchdir) +SYSCALL(sys_getpgid,sys_getpgid) +SYSCALL(sys_fchdir,sys_fchdir) SYSCALL(sys_bdflush,compat_sys_bdflush) SYSCALL(sys_sysfs,compat_sys_sysfs) /* 135 */ -SYSCALL(sys_s390_personality,compat_sys_s390_personality) +SYSCALL(sys_s390_personality,sys_s390_personality) NI_SYSCALL /* for afs_syscall */ SYSCALL(sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ SYSCALL(sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ SYSCALL(sys_llseek,compat_sys_llseek) /* 140 */ SYSCALL(sys_getdents,compat_sys_getdents) SYSCALL(sys_select,compat_sys_select) -SYSCALL(sys_flock,compat_sys_flock) +SYSCALL(sys_flock,sys_flock) SYSCALL(sys_msync,compat_sys_msync) SYSCALL(sys_readv,compat_sys_readv) /* 145 */ SYSCALL(sys_writev,compat_sys_writev) -SYSCALL(sys_getsid,compat_sys_getsid) -SYSCALL(sys_fdatasync,compat_sys_fdatasync) +SYSCALL(sys_getsid,sys_getsid) +SYSCALL(sys_fdatasync,sys_fdatasync) SYSCALL(sys_sysctl,compat_sys_sysctl) SYSCALL(sys_mlock,compat_sys_mlock) /* 150 */ SYSCALL(sys_munlock,compat_sys_munlock) -SYSCALL(sys_mlockall,compat_sys_mlockall) +SYSCALL(sys_mlockall,sys_mlockall) SYSCALL(sys_munlockall,sys_munlockall) SYSCALL(sys_sched_setparam,compat_sys_sched_setparam) SYSCALL(sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ SYSCALL(sys_sched_setscheduler,compat_sys_sched_setscheduler) -SYSCALL(sys_sched_getscheduler,compat_sys_sched_getscheduler) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler) SYSCALL(sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,compat_sys_sched_get_priority_max) -SYSCALL(sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min) /* 160 */ SYSCALL(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) SYSCALL(sys_nanosleep,compat_sys_nanosleep) SYSCALL(sys_mremap,compat_sys_mremap) @@ -211,20 +211,20 @@ SYSCALL(sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid) SYSCALL(sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,compat_sys_setreuid) -SYSCALL(sys_setregid,compat_sys_setregid) +SYSCALL(sys_setreuid,sys_setreuid) +SYSCALL(sys_setregid,sys_setregid) SYSCALL(sys_getgroups,compat_sys_getgroups) /* 205 */ SYSCALL(sys_setgroups,compat_sys_setgroups) -SYSCALL(sys_fchown,compat_sys_fchown) -SYSCALL(sys_setresuid,compat_sys_setresuid) +SYSCALL(sys_fchown,sys_fchown) +SYSCALL(sys_setresuid,sys_setresuid) SYSCALL(sys_getresuid,compat_sys_getresuid) -SYSCALL(sys_setresgid,compat_sys_setresgid) /* 210 */ +SYSCALL(sys_setresgid,sys_setresgid) /* 210 */ SYSCALL(sys_getresgid,compat_sys_getresgid) SYSCALL(sys_chown,compat_sys_chown) -SYSCALL(sys_setuid,compat_sys_setuid) -SYSCALL(sys_setgid,compat_sys_setgid) -SYSCALL(sys_setfsuid,compat_sys_setfsuid) /* 215 */ -SYSCALL(sys_setfsgid,compat_sys_setfsgid) +SYSCALL(sys_setuid,sys_setuid) +SYSCALL(sys_setgid,sys_setgid) +SYSCALL(sys_setfsuid,sys_setfsuid) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid) SYSCALL(sys_pivot_root,compat_sys_pivot_root) SYSCALL(sys_mincore,compat_sys_mincore) SYSCALL(sys_madvise,compat_sys_madvise) @@ -245,19 +245,19 @@ SYSCALL(sys_removexattr,compat_sys_removexattr) SYSCALL(sys_lremovexattr,compat_sys_lremovexattr) SYSCALL(sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid) -SYSCALL(sys_tkill,compat_sys_tkill) +SYSCALL(sys_tkill,sys_tkill) SYSCALL(sys_futex,compat_sys_futex) SYSCALL(sys_sched_setaffinity,compat_sys_sched_setaffinity) SYSCALL(sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */ -SYSCALL(sys_tgkill,compat_sys_tgkill) +SYSCALL(sys_tgkill,sys_tgkill) NI_SYSCALL /* reserved for TUX */ SYSCALL(sys_io_setup,compat_sys_io_setup) SYSCALL(sys_io_destroy,compat_sys_io_destroy) SYSCALL(sys_io_getevents,compat_sys_io_getevents) /* 245 */ SYSCALL(sys_io_submit,compat_sys_io_submit) SYSCALL(sys_io_cancel,compat_sys_io_cancel) -SYSCALL(sys_exit_group,compat_sys_exit_group) -SYSCALL(sys_epoll_create,compat_sys_epoll_create) +SYSCALL(sys_exit_group,sys_exit_group) +SYSCALL(sys_epoll_create,sys_epoll_create) SYSCALL(sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ SYSCALL(sys_epoll_wait,compat_sys_epoll_wait) SYSCALL(sys_set_tid_address,compat_sys_set_tid_address) @@ -265,8 +265,8 @@ SYSCALL(sys_fadvise64_64,compat_sys_s390_fadvise64) SYSCALL(sys_timer_create,compat_sys_timer_create) SYSCALL(sys_timer_settime,compat_sys_timer_settime) /* 255 */ SYSCALL(sys_timer_gettime,compat_sys_timer_gettime) -SYSCALL(sys_timer_getoverrun,compat_sys_timer_getoverrun) -SYSCALL(sys_timer_delete,compat_sys_timer_delete) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun) +SYSCALL(sys_timer_delete,sys_timer_delete) SYSCALL(sys_clock_settime,compat_sys_clock_settime) SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */ SYSCALL(sys_clock_getres,compat_sys_clock_getres) @@ -290,11 +290,11 @@ SYSCALL(sys_add_key,compat_sys_add_key) SYSCALL(sys_request_key,compat_sys_request_key) SYSCALL(sys_keyctl,compat_sys_keyctl) /* 280 */ SYSCALL(sys_waitid,compat_sys_waitid) -SYSCALL(sys_ioprio_set,compat_sys_ioprio_set) -SYSCALL(sys_ioprio_get,compat_sys_ioprio_get) +SYSCALL(sys_ioprio_set,sys_ioprio_set) +SYSCALL(sys_ioprio_get,sys_ioprio_get) SYSCALL(sys_inotify_init,sys_inotify_init) SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */ -SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch) +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch) SYSCALL(sys_migrate_pages,compat_sys_migrate_pages) SYSCALL(sys_openat,compat_sys_openat) SYSCALL(sys_mkdirat,compat_sys_mkdirat) @@ -326,31 +326,31 @@ SYSCALL(sys_fallocate,compat_sys_s390_fallocate) SYSCALL(sys_utimensat,compat_sys_utimensat) /* 315 */ SYSCALL(sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ -SYSCALL(sys_eventfd,compat_sys_eventfd) -SYSCALL(sys_timerfd_create,compat_sys_timerfd_create) +SYSCALL(sys_eventfd,sys_eventfd) +SYSCALL(sys_timerfd_create,sys_timerfd_create) SYSCALL(sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ SYSCALL(sys_timerfd_gettime,compat_sys_timerfd_gettime) SYSCALL(sys_signalfd4,compat_sys_signalfd4) -SYSCALL(sys_eventfd2,compat_sys_eventfd2) -SYSCALL(sys_inotify_init1,compat_sys_inotify_init1) +SYSCALL(sys_eventfd2,sys_eventfd2) +SYSCALL(sys_inotify_init1,sys_inotify_init1) SYSCALL(sys_pipe2,compat_sys_pipe2) /* 325 */ -SYSCALL(sys_dup3,compat_sys_dup3) -SYSCALL(sys_epoll_create1,compat_sys_epoll_create1) +SYSCALL(sys_dup3,sys_dup3) +SYSCALL(sys_epoll_create1,sys_epoll_create1) SYSCALL(sys_preadv,compat_sys_preadv) SYSCALL(sys_pwritev,compat_sys_pwritev) SYSCALL(sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ SYSCALL(sys_perf_event_open,compat_sys_perf_event_open) -SYSCALL(sys_fanotify_init,compat_sys_fanotify_init) +SYSCALL(sys_fanotify_init,sys_fanotify_init) SYSCALL(sys_fanotify_mark,compat_sys_fanotify_mark) SYSCALL(sys_prlimit64,compat_sys_prlimit64) SYSCALL(sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ SYSCALL(sys_open_by_handle_at,compat_sys_open_by_handle_at) SYSCALL(sys_clock_adjtime,compat_sys_clock_adjtime) -SYSCALL(sys_syncfs,compat_sys_syncfs) -SYSCALL(sys_setns,compat_sys_setns) +SYSCALL(sys_syncfs,sys_syncfs) +SYSCALL(sys_setns,sys_setns) SYSCALL(sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */ SYSCALL(sys_process_vm_writev,compat_sys_process_vm_writev) -SYSCALL(sys_s390_runtime_instr,compat_sys_s390_runtime_instr) +SYSCALL(sys_s390_runtime_instr,sys_s390_runtime_instr) SYSCALL(sys_kcmp,compat_sys_kcmp) SYSCALL(sys_finit_module,compat_sys_finit_module) SYSCALL(sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ -- cgit v0.10.2 From 977108f89c989b1eeb5c8d938e1e71913391eb5f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Sep 2015 18:30:36 +0200 Subject: s390: wire up separate socketcalls system calls As discussed on linux-arch all architectures should wire up the separate system calls that are hidden behind the socketcall multiplexer system call. It's just a couple more system calls and gives us a very small performance improvement. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 525cef7..02613ba 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -8,28 +8,8 @@ #include - #define __IGNORE_time -/* Ignore system calls that are also reachable via sys_socketcall */ -#define __IGNORE_recvmmsg -#define __IGNORE_sendmmsg -#define __IGNORE_socket -#define __IGNORE_socketpair -#define __IGNORE_bind -#define __IGNORE_connect -#define __IGNORE_listen -#define __IGNORE_accept4 -#define __IGNORE_getsockopt -#define __IGNORE_setsockopt -#define __IGNORE_getsockname -#define __IGNORE_getpeername -#define __IGNORE_sendto -#define __IGNORE_sendmsg -#define __IGNORE_recvfrom -#define __IGNORE_recvmsg -#define __IGNORE_shutdown - #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index ff74d49..a848adb 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -292,7 +292,24 @@ #define __NR_execveat 354 #define __NR_userfaultfd 355 #define __NR_membarrier 356 -#define NR_syscalls 357 +#define __NR_recvmmsg 357 +#define __NR_sendmmsg 358 +#define __NR_socket 359 +#define __NR_socketpair 360 +#define __NR_bind 361 +#define __NR_connect 362 +#define __NR_listen 363 +#define __NR_accept4 364 +#define __NR_getsockopt 365 +#define __NR_setsockopt 366 +#define __NR_getsockname 367 +#define __NR_getpeername 368 +#define __NR_sendto 369 +#define __NR_sendmsg 370 +#define __NR_recvfrom 371 +#define __NR_recvmsg 372 +#define __NR_shutdown 373 +#define NR_syscalls 374 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index ada6301..09f1940 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -169,3 +169,10 @@ COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, fla COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size); COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length); COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length); +COMPAT_SYSCALL_WRAP4(socketpair, int, family, int, type, int, protocol, int __user *, usockvec); +COMPAT_SYSCALL_WRAP3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen); +COMPAT_SYSCALL_WRAP3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen); +COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, int __user *, upeer_addrlen, int, flags); +COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); +COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); +COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 996f95a..8c56929 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -365,3 +365,20 @@ SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) SYSCALL(sys_execveat,compat_sys_execveat) SYSCALL(sys_userfaultfd,sys_userfaultfd) /* 355 */ SYSCALL(sys_membarrier,sys_membarrier) +SYSCALL(sys_recvmmsg,compat_sys_recvmmsg) +SYSCALL(sys_sendmmsg,compat_sys_sendmmsg) +SYSCALL(sys_socket,sys_socket) +SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */ +SYSCALL(sys_bind,sys_bind) +SYSCALL(sys_connect,sys_connect) +SYSCALL(sys_listen,sys_listen) +SYSCALL(sys_accept4,sys_accept4) +SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */ +SYSCALL(sys_setsockopt,compat_sys_setsockopt) +SYSCALL(sys_getsockname,compat_sys_getsockname) +SYSCALL(sys_getpeername,compat_sys_getpeername) +SYSCALL(sys_sendto,compat_sys_sendto) +SYSCALL(sys_sendmsg,compat_sys_sendmsg) /* 370 */ +SYSCALL(sys_recvfrom,compat_sys_recvfrom) +SYSCALL(sys_recvmsg,compat_sys_recvmsg) +SYSCALL(sys_shutdown,sys_shutdown) -- cgit v0.10.2 From 00cc1633816de8c95f337608a1ea64e228faf771 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 18 Sep 2015 11:27:45 +0200 Subject: sched: access local runqueue directly in single_task_running Commit 2ee507c47293 ("sched: Add function single_task_running to let a task check if it is the only task running on a cpu") referenced the current runqueue with the smp_processor_id. When CONFIG_DEBUG_PREEMPT is enabled, that is only allowed if preemption is disabled or the currrent task is bound to the local cpu (e.g. kernel worker). With commit f78195129963 ("kvm: add halt_poll_ns module parameter") KVM calls single_task_running. If CONFIG_DEBUG_PREEMPT is enabled that generates a lot of kernel messages. To avoid adding preemption in that cases, as it would limit the usefulness, we change single_task_running to access directly the cpu local runqueue. Cc: Tim Chen Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Cc: Fixes: 2ee507c472939db4b146d545352b8a7c79ef47f8 Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3595403..4064f79 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2666,13 +2666,20 @@ unsigned long nr_running(void) /* * Check if only the current task is running on the cpu. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptable section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) */ bool single_task_running(void) { - if (cpu_rq(smp_processor_id())->nr_running == 1) - return true; - else - return false; + return raw_rq()->nr_running == 1; } EXPORT_SYMBOL(single_task_running); -- cgit v0.10.2 From ebae871a509d3c24b32ff67af2671dadffc58770 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 18 Sep 2015 15:39:05 +0200 Subject: kvm: svm: reset mmu on VCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When INIT/SIPI sequence is sent to VCPU which before that was in use by OS, VMRUN might fail with: KVM: entry failed, hardware error 0xffffffff EAX=00000000 EBX=00000000 ECX=00000000 EDX=000006d3 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=00000000 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =9a00 0009a000 0000ffff 00009a00 [...] CR0=60000010 CR2=b6f3e000 CR3=01942000 CR4=000007e0 [...] EFER=0000000000000000 with corresponding SVM error: KVM: FAILED VMRUN WITH VMCB: [...] cpl: 0 efer: 0000000000001000 cr0: 0000000080010010 cr2: 00007fd7fe85bf90 cr3: 0000000187d0c000 cr4: 0000000000000020 [...] What happens is that VCPU state right after offlinig: CR0: 0x80050033 EFER: 0xd01 CR4: 0x7e0 -> long mode with CR3 pointing to longmode page tables and when VCPU gets INIT/SIPI following transition happens CR0: 0 -> 0x60000010 EFER: 0x0 CR4: 0x7e0 -> paging disabled with stale CR3 However SVM under the hood puts VCPU in Paged Real Mode* which effectively translates CR0 0x60000010 -> 80010010 after svm_vcpu_reset() -> init_vmcb() -> kvm_set_cr0() -> svm_set_cr0() but from kvm_set_cr0() perspective CR0: 0 -> 0x60000010 only caching bits are changed and commit d81135a57aa6 ("KVM: x86: do not reset mmu if CR0.CD and CR0.NW are changed")' regressed svm_vcpu_reset() which relied on MMU being reset. As result VMRUN after svm_vcpu_reset() tries to run VCPU in Paged Real Mode with stale MMU context (longmode page tables), which causes some AMD CPUs** to bail out with VMEXIT_INVALID. Fix issue by unconditionally resetting MMU context at init_vmcb() time. * AMD64 Architecture Programmer’s Manual, Volume 2: System Programming, rev: 3.25 15.19 Paged Real Mode ** Opteron 1216 Signed-off-by: Igor Mammedov Fixes: d81135a57aa6 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fdb8cb6..89173af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1264,6 +1264,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * It also updates the guest-visible cr0 value. */ (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; /* rdx = ?? */ -- cgit v0.10.2 From ab1fffe3a73c694d698645451ba61255ec4ba5e6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 18 Sep 2015 15:02:50 +0300 Subject: ASoC: davinci-mcasp: Fix devm_kasprintf format string The '\n' at the end of the format string is not needed. It adds an extra line break when doing cat /proc/interrupts Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 2c5a2c5..7d45d98 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1685,7 +1685,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) irq = platform_get_irq_byname(pdev, "common"); if (irq >= 0) { - irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_common\n", + irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_common", dev_name(&pdev->dev)); ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_common_irq_handler, @@ -1702,7 +1702,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) irq = platform_get_irq_byname(pdev, "rx"); if (irq >= 0) { - irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_rx\n", + irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_rx", dev_name(&pdev->dev)); ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_rx_irq_handler, @@ -1717,7 +1717,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) irq = platform_get_irq_byname(pdev, "tx"); if (irq >= 0) { - irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_tx\n", + irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_tx", dev_name(&pdev->dev)); ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_tx_irq_handler, -- cgit v0.10.2 From aadfc3b2042d69a6b4b8d719d4221b988d7f31a5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 9 Sep 2015 01:28:21 -0400 Subject: IB/hfi1: fix pstateinfo from returning improperly byteswapped value Byteswap link_width_downgrade_*_active values before sending on the wire. In addition properly define the Port State Info structure. Reviewed-by: Dennis Dalessandro Reviewed-by: Christian Gomez Signed-off-by: Rimmer, Todd Signed-off-by: Ira Weiny Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 37269eb..b2c1b72 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1717,9 +1717,9 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, psi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = - ppd->link_width_downgrade_tx_active; + cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = - ppd->link_width_downgrade_rx_active; + cpu_to_be16(ppd->link_width_downgrade_rx_active); if (resp_len) *resp_len += sizeof(struct opa_port_state_info); diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 391dae1..a0fa975 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -294,8 +294,8 @@ struct opa_port_states { struct opa_port_state_info { struct opa_port_states port_states; - u16 link_width_downgrade_tx_active; - u16 link_width_downgrade_rx_active; + __be16 link_width_downgrade_tx_active; + __be16 link_width_downgrade_rx_active; }; struct opa_port_info { -- cgit v0.10.2 From e1df0068a24ba56673183cc3bd392d8bc301d423 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Sep 2015 13:35:25 +0300 Subject: IB/hfi1: fix copy_to/from_user() error handling copy_to/from_user() returns the number of bytes which we were not able to copy. It doesn't return an error code. Also a couple places had a printk() on error and I removed that because people can take advantage of it to fill /var/log/messages with spam. Signed-off-by: Dan Carpenter Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 6777d6b..ce01dee 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -1012,11 +1012,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: memset(&link_info, 0, sizeof(link_info)); - ret = copy_from_user(&link_info, + if (copy_from_user(&link_info, (struct hfi1_link_info __user *)arg, - sizeof(link_info)); - if (ret) - break; + sizeof(link_info))) + ret = -EFAULT; value = link_info.port_state; index = link_info.port_number; @@ -1080,9 +1079,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { memset(&link_info, 0, sizeof(link_info)); - ret = copy_from_user(&link_info, + if (copy_from_user(&link_info, (struct hfi1_link_info __user *)arg, - sizeof(link_info)); + sizeof(link_info))) + ret = -EFAULT; index = link_info.port_number; } else { ret = __get_user(index, (int __user *) arg); @@ -1114,9 +1114,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) ppd->link_speed_active; link_info.link_width_active = ppd->link_width_active; - ret = copy_to_user( + if (copy_to_user( (struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info)); + &link_info, sizeof(link_info))) + ret = -EFAULT; } else { ret = __put_user(value, (int __user *)arg); } @@ -1142,10 +1143,9 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) snoop_dbg("Setting filter"); /* just copy command structure */ argp = (unsigned long *)arg; - ret = copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd)); - if (ret < 0) { - pr_alert("Error copying filter command\n"); + if (copy_from_user(&filter_cmd, (void __user *)argp, + sizeof(filter_cmd))) { + ret = -EFAULT; break; } if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { @@ -1167,12 +1167,11 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) break; } /* copy remaining data from userspace */ - ret = copy_from_user((u8 *)filter_value, + if (copy_from_user((u8 *)filter_value, (void __user *)filter_cmd.value_ptr, - filter_cmd.length); - if (ret < 0) { + filter_cmd.length)) { kfree(filter_value); - pr_alert("Error copying filter data\n"); + ret = -EFAULT; break; } /* Drain packets first */ -- cgit v0.10.2 From aeef010a0f63ad0a6f993d3da30753e9a8a39ec5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 15 Sep 2015 10:19:27 -0400 Subject: IB/hfi1: fix sdma_descq_cnt parameter parsing The boolean tests should have been or-ed. Reported-by: David Binderman Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index a8c903c..7e01f30 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -737,7 +737,7 @@ u16 sdma_get_descq_cnt(void) */ if (!is_power_of_2(count)) return SDMA_DESCQ_CNT; - if (count < 64 && count > 32768) + if (count < 64 || count > 32768) return SDMA_DESCQ_CNT; return count; } -- cgit v0.10.2 From 50b19729ced72cfa8bb1c44fed9203f395f13991 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Sep 2015 09:22:20 +0300 Subject: IB/hfi1: checking for NULL instead of IS_ERR __get_txreq() returns an ERR_PTR() but this checks for NULL so it would oops on failure. Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 53ac214..41bb59e 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -749,11 +749,13 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct verbs_txreq *tx; tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); - if (!tx) + if (!tx) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); - if (tx) - tx->qp = qp; + if (IS_ERR(tx)) + return tx; + } + tx->qp = qp; return tx; } -- cgit v0.10.2 From 951842b0540d2ed49ae29ba968adc496baf46556 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Sep 2015 09:22:51 +0300 Subject: IB/hfi1: fix a locking bug mutex_trylock() returns zero on failure, not EBUSY. Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 654eafe..aa58e59 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -2710,7 +2710,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (sleep_ok) { mutex_lock(&ppd->hls_lock); } else { - while (mutex_trylock(&ppd->hls_lock) == EBUSY) + while (!mutex_trylock(&ppd->hls_lock)) udelay(1); } @@ -2758,7 +2758,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (sleep_ok) { mutex_lock(&dd->pport->hls_lock); } else { - while (mutex_trylock(&dd->pport->hls_lock) == EBUSY) + while (!mutex_trylock(&dd->pport->hls_lock)) udelay(1); } -- cgit v0.10.2 From ebe6b2e8bc2cd06a330b3f9be8a4fa3ff44ab026 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Sep 2015 09:42:25 +0300 Subject: IB/hfi1: info leak in get_ctxt_info() The cinfo struct has a hole after the last struct member so we need to zero it out. Otherwise we disclose some uninitialized stack data. Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 4698617..2c43ca5 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1181,6 +1181,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) struct hfi1_filedata *fd = fp->private_data; int ret = 0; + memset(&cinfo, 0, sizeof(cinfo)); ret = hfi1_get_base_kinfo(uctxt, &cinfo); if (ret < 0) goto done; -- cgit v0.10.2 From 3f2686a2665b4d06753b602fe394b5d87bc7f279 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Sep 2015 19:02:54 +0300 Subject: IB/hfi1: clean up some defines I added spaces around operators so it matches kernel style because normally "-1ULL" is a number and " - 1" is a subtract operation. Also removed some superflous "ULL" types so "1ULL" becomes "1". Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 1e613fc..4960869 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -109,53 +109,53 @@ /* * Bits defined in the send DMA descriptor. */ -#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL<<63) -#define SDMA_DESC0_LAST_DESC_FLAG (1ULL<<62) +#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL << 63) +#define SDMA_DESC0_LAST_DESC_FLAG (1ULL << 62) #define SDMA_DESC0_BYTE_COUNT_SHIFT 48 #define SDMA_DESC0_BYTE_COUNT_WIDTH 14 #define SDMA_DESC0_BYTE_COUNT_MASK \ - ((1ULL< Date: Wed, 16 Sep 2015 19:03:45 +0300 Subject: IB/hfi1: mask vs shift confusion We are shifting by the _MASK macros instead of the _SHIFT ones. Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 7e01f30..aecd1a7 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -1848,7 +1848,7 @@ static void dump_sdma_state(struct sdma_engine *sde) dd_dev_err(sde->dd, "\taidx: %u amode: %u alen: %u\n", (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_MASK), + >> SDMA_DESC1_HEADER_INDEX_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) >> SDMA_DESC1_HEADER_MODE_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK) @@ -1926,7 +1926,7 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n", (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_MASK), + >> SDMA_DESC1_HEADER_INDEX_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) >> SDMA_DESC1_HEADER_MODE_SHIFT)); head = (head + 1) & sde->sdma_mask; -- cgit v0.10.2 From e116a64fab650aed3d7b9b4db0b59c07f361bc9f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 17 Sep 2015 13:47:49 -0400 Subject: IB/hfi: Properly set permissions for user device files Some of the device files are required to be user accessible for PSM while most should remain accessible only by root. Add a parameter to hfi1_cdev_init which controls if the user should have access to this device which places it in a different class with the appropriate devnode callback. In addition set the devnode call back for the existing class to be a bit more explicit for those permissions. Finally remove the unnecessary null check before class_destroy Tested-by: Donald Dutile Signed-off-by: Haralanov, Mitko (mitko.haralanov@intel.com) Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index 07c87a87..bc26a53 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -57,11 +57,13 @@ #include "device.h" static struct class *class; +static struct class *user_class; static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, - struct cdev *cdev, struct device **devp) + struct cdev *cdev, struct device **devp, + bool user_accessible) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -78,7 +80,11 @@ int hfi1_cdev_init(int minor, const char *name, goto done; } - device = device_create(class, NULL, dev, NULL, "%s", name); + if (user_accessible) + device = device_create(user_class, NULL, dev, NULL, "%s", name); + else + device = device_create(class, NULL, dev, NULL, "%s", name); + if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); @@ -110,6 +116,26 @@ const char *class_name(void) return hfi1_class_name; } +static char *hfi1_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const char *hfi1_class_name_user = "hfi1_user"; +const char *class_name_user(void) +{ + return hfi1_class_name_user; +} + +static char *hfi1_user_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + int __init dev_init(void) { int ret; @@ -125,7 +151,22 @@ int __init dev_init(void) ret = PTR_ERR(class); pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); + goto done; } + class->devnode = hfi1_devnode; + + user_class = class_create(THIS_MODULE, class_name_user()); + if (IS_ERR(user_class)) { + ret = PTR_ERR(user_class); + pr_err("Could not create device class for user accessible files (err %d)\n", + -ret); + class_destroy(class); + class = NULL; + user_class = NULL; + unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); + goto done; + } + user_class->devnode = hfi1_user_devnode; done: return ret; @@ -133,10 +174,11 @@ done: void dev_cleanup(void) { - if (class) { - class_destroy(class); - class = NULL; - } + class_destroy(class); + class = NULL; + + class_destroy(user_class); + user_class = NULL; unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); } diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h index 98caecd..2850ff7 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/staging/rdma/hfi1/device.h @@ -52,7 +52,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, - struct cdev *cdev, struct device **devp); + struct cdev *cdev, struct device **devp, + bool user_accessible); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index ce01dee..3e8d5ac 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -292,7 +292,7 @@ int hfi1_diag_add(struct hfi1_devdata *dd) if (atomic_inc_return(&diagpkt_count) == 1) { ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device); + &diagpkt_device, false); } return ret; @@ -592,7 +592,8 @@ static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); + &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, + false); if (ret) { dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 2c43ca5..72d3850 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -2090,14 +2090,16 @@ static int user_add(struct hfi1_devdata *dd) if (atomic_inc_return(&user_count) == 1) { ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device); + &wildcard_cdev, &wildcard_device, + true); if (ret) goto done; } snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, - &dd->user_cdev, &dd->user_device); + &dd->user_cdev, &dd->user_device, + true); if (ret) goto done; @@ -2105,7 +2107,8 @@ static int user_add(struct hfi1_devdata *dd) snprintf(name, sizeof(name), "%s_ui%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device); + &dd->ui_cdev, &dd->ui_device, + false); if (ret) goto done; } -- cgit v0.10.2 From ae4f976968896f8f41b3a7aa21be6146492211e5 Mon Sep 17 00:00:00 2001 From: Tyler Baker Date: Sat, 19 Sep 2015 03:58:10 -0400 Subject: mm: fix type cast in __pfn_to_phys() The various definitions of __pfn_to_phys() have been consolidated to use a generic macro in include/asm-generic/memory_model.h. This hit mainline in the form of 012dcef3f058 "mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h". When the generic macro was implemented the type cast to phys_addr_t was dropped which caused boot regressions on ARM platforms with more than 4GB of memory and LPAE enabled. It was suggested to use PFN_PHYS() defined in include/linux/pfn.h as provides the correct logic and avoids further duplication. Reported-by: kernelci.org bot Suggested-by: Dan Williams Signed-off-by: Tyler Baker Signed-off-by: Dan Williams diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index f20f407..4b4b056 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -73,7 +73,7 @@ * Convert a physical address to a Page Frame Number and back */ #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page -- cgit v0.10.2 From ee92cfb030c16ddb01f6543968f13bcb61ed9da5 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Fri, 18 Sep 2015 17:19:25 +0800 Subject: ASoC: wm8962: remove 64k sample rate support wm8962 can't support 64k sample rate. When playing a 64KHz wave file, 'Unsupported rate 64000Hz' will be prompted. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b4eb975..293e47a 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2944,7 +2944,8 @@ static int wm8962_mute(struct snd_soc_dai *dai, int mute) WM8962_DAC_MUTE, val); } -#define WM8962_RATES SNDRV_PCM_RATE_8000_96000 +#define WM8962_RATES (SNDRV_PCM_RATE_8000_48000 |\ + SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) #define WM8962_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\ SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) -- cgit v0.10.2 From 8524bb0c7ac688a3cd6ba12dae6104c54d0566b9 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Fri, 18 Sep 2015 17:19:43 +0800 Subject: ASoC: wm8960: correct the max register value of mic boost pga the max register value of mic boost pga should be 3. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 70816f0..d9b43b0 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -234,9 +234,9 @@ SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT3 Volume", SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT2 Volume", WM8960_INBMIX2, 1, 7, 0, lineinboost_tlv), SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT1 Volume", - WM8960_RINPATH, 4, 4, 0, micboost_tlv), + WM8960_RINPATH, 4, 3, 0, micboost_tlv), SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT1 Volume", - WM8960_LINPATH, 4, 4, 0, micboost_tlv), + WM8960_LINPATH, 4, 3, 0, micboost_tlv), SOC_DOUBLE_R_TLV("Playback Volume", WM8960_LDAC, WM8960_RDAC, 0, 255, 0, dac_tlv), -- cgit v0.10.2 From d702ffd4d1df73b9c620af1654af42ff5b8d5c09 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 19:09:07 +0200 Subject: regulator: anatop: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 738adfa..52ea605 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -318,6 +318,7 @@ static const struct of_device_id of_anatop_regulator_match_tbl[] = { { .compatible = "fsl,anatop-regulator", }, { /* end */ } }; +MODULE_DEVICE_TABLE(of, of_anatop_regulator_match_tbl); static struct platform_driver anatop_regulator_driver = { .driver = { -- cgit v0.10.2 From 2f9481e7dc0d3aacbaa07701f3ee2527f5d48301 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 19:09:24 +0200 Subject: regulator: gpio: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 464018d..7bba8b7 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -394,6 +394,7 @@ static const struct of_device_id regulator_gpio_of_match[] = { { .compatible = "regulator-gpio", }, {}, }; +MODULE_DEVICE_TABLE(of, regulator_gpio_of_match); #endif static struct platform_driver gpio_regulator_driver = { -- cgit v0.10.2 From 7209fee89f435b69051bb6bffe7f191336ac2a5e Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 19:09:51 +0200 Subject: regulator: vexpress: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown diff --git a/drivers/regulator/vexpress.c b/drivers/regulator/vexpress.c index bed9d3e..c810cbb 100644 --- a/drivers/regulator/vexpress.c +++ b/drivers/regulator/vexpress.c @@ -103,6 +103,7 @@ static const struct of_device_id vexpress_regulator_of_match[] = { { .compatible = "arm,vexpress-volt", }, { } }; +MODULE_DEVICE_TABLE(of, vexpress_regulator_of_match); static struct platform_driver vexpress_regulator_driver = { .probe = vexpress_regulator_probe, -- cgit v0.10.2 From c9e97b3cb2b80deb94c092a2022a6d385b838d84 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 19:42:17 +0200 Subject: spi: meson: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c index 5468fc7..2465259 100644 --- a/drivers/spi/spi-meson-spifc.c +++ b/drivers/spi/spi-meson-spifc.c @@ -444,6 +444,7 @@ static const struct of_device_id meson_spifc_dt_match[] = { { .compatible = "amlogic,meson6-spifc", }, { }, }; +MODULE_DEVICE_TABLE(of, meson_spifc_dt_match); static struct platform_driver meson_spifc_driver = { .probe = meson_spifc_probe, -- cgit v0.10.2 From 2ace47be5a315def8f493ca77aa59c077ade30a1 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 18 Sep 2015 16:02:20 +0530 Subject: ASoC: wm0010: fix memory leak We have requested for the firmware but we have missed releasing it both on success and on error path. While checking the code it turned out that the requested firmware is not even used. More over the same firmware is being loaded by wm0010_stage2_load(). Signed-off-by: Sudip Mukherjee Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 9d370a4..7a1bc40 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -577,7 +577,6 @@ static int wm0010_boot(struct snd_soc_codec *codec) struct wm0010_priv *wm0010 = snd_soc_codec_get_drvdata(codec); unsigned long flags; int ret; - const struct firmware *fw; struct spi_message m; struct spi_transfer t; struct dfw_pllrec pll_rec; @@ -623,14 +622,6 @@ static int wm0010_boot(struct snd_soc_codec *codec) wm0010->state = WM0010_OUT_OF_RESET; spin_unlock_irqrestore(&wm0010->irq_lock, flags); - /* First the bootloader */ - ret = request_firmware(&fw, "wm0010_stage2.bin", codec->dev); - if (ret != 0) { - dev_err(codec->dev, "Failed to request stage2 loader: %d\n", - ret); - goto abort; - } - if (!wait_for_completion_timeout(&wm0010->boot_completion, msecs_to_jiffies(20))) dev_err(codec->dev, "Failed to get interrupt from DSP\n"); -- cgit v0.10.2 From f072f91aa7517386344476813ca0799e08fd0c35 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 18 Sep 2015 16:02:21 +0530 Subject: ASoC: wm0010: fix error path Fix the error path so that we can free the allocated memory on the error path instead of releasing them individually on each error. Signed-off-by: Sudip Mukherjee Acked-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 7a1bc40..76b2f88 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -663,10 +663,8 @@ static int wm0010_boot(struct snd_soc_codec *codec) } img_swap = kzalloc(len, GFP_KERNEL | GFP_DMA); - if (!img_swap) { - kfree(out); - goto abort; - } + if (!img_swap) + goto abort_out; /* We need to re-order for 0010 */ byte_swap_64((u64 *)&pll_rec, img_swap, len); @@ -681,20 +679,16 @@ static int wm0010_boot(struct snd_soc_codec *codec) spi_message_add_tail(&t, &m); ret = spi_sync(spi, &m); - if (ret != 0) { + if (ret) { dev_err(codec->dev, "First PLL write failed: %d\n", ret); - kfree(img_swap); - kfree(out); - goto abort; + goto abort_swap; } /* Use a second send of the message to get the return status */ ret = spi_sync(spi, &m); - if (ret != 0) { + if (ret) { dev_err(codec->dev, "Second PLL write failed: %d\n", ret); - kfree(img_swap); - kfree(out); - goto abort; + goto abort_swap; } p = (u32 *)out; @@ -727,6 +721,10 @@ static int wm0010_boot(struct snd_soc_codec *codec) return 0; +abort_swap: + kfree(img_swap); +abort_out: + kfree(out); abort: /* Put the chip back into reset */ wm0010_halt(codec); -- cgit v0.10.2 From 5b64c173cdea21105eb4794487b3d593f0a2e6c3 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Wed, 16 Sep 2015 10:13:19 +0100 Subject: ASoC: fsl_ssi: Fix checking of dai format for AC97 mode Current code incorrectly treats dai format for AC97 as bit mask whereas it's actually an integer value. This causes DAI formats other than AC97 (e.g. DSP_B) to trigger AC97 related code, which is incorrect and breaks functionality. This patch fixes the code to correctly compare values to determine AC97 or not. Signed-off-by: Adam Thomson Acked-by: Timur Tabi Signed-off-by: Mark Brown diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 8ec6fb2..37c5cd4 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -249,7 +249,8 @@ MODULE_DEVICE_TABLE(of, fsl_ssi_ids); static bool fsl_ssi_is_ac97(struct fsl_ssi_private *ssi_private) { - return !!(ssi_private->dai_fmt & SND_SOC_DAIFMT_AC97); + return (ssi_private->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) == + SND_SOC_DAIFMT_AC97; } static bool fsl_ssi_is_i2s_master(struct fsl_ssi_private *ssi_private) @@ -947,7 +948,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, CCSR_SSI_SCR_TCH_EN); } - if (fmt & SND_SOC_DAIFMT_AC97) + if ((fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_AC97) fsl_ssi_setup_ac97(ssi_private); return 0; -- cgit v0.10.2 From 79234c3db6842a3de03817211d891e0c2878f756 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2015 15:53:24 -0400 Subject: SUNRPC: Lock the transport layer on shutdown Avoid all races with the connect/disconnect handlers by taking the transport lock. Reported-by:"Suzuki K. Poulose" Acked-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ab5dd62..2e98f4a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -614,6 +614,7 @@ static void xprt_autoclose(struct work_struct *work) clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -723,6 +724,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) xprt->ops->release_xprt(xprt, NULL); out: spin_unlock_bh(&xprt->transport_lock); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -1394,6 +1396,10 @@ out: static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); + + /* Exclude transport connect/disconnect handlers */ + wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + del_timer_sync(&xprt->timer); rpc_xprt_debugfs_unregister(xprt); -- cgit v0.10.2 From 4b0ab51db32eba0f48b7618254742f143364a28d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2015 09:52:07 -0400 Subject: SUNRPC: xs_sock_mark_closed() does not need to trigger socket autoclose Under all conditions, it should be quite sufficient just to mark the socket as disconnected. It will then be closed by the transport shutdown or reconnect code. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c350385..1a85e0e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -777,7 +777,6 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); - xprt_force_disconnect(xprt); } /** -- cgit v0.10.2 From 590dca3a71875461e8fea3013af74386945191b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Sep 2015 13:35:08 -0400 Subject: fs-writeback: unplug before cond_resched in writeback_sb_inodes Commit 505a666ee3fc ("writeback: plug writeback in wb_writeback() and writeback_inodes_wb()") has us holding a plug during writeback_sb_inodes, which increases the merge rate when relatively contiguous small files are written by the filesystem. It helps both on flash and spindles. For an fs_mark workload creating 4K files in parallel across 8 drives, this commit improves performance ~9% more by unplugging before calling cond_resched(). cond_resched() doesn't trigger an implicit unplug, so explicitly getting the IO down to the device before scheduling reduces latencies for anyone waiting on clean pages. It also cuts down on how often we use kblockd to unplug, which means less work bouncing from one workqueue to another. Many more details about how we got here: https://lkml.org/lkml/2015/9/11/570 Signed-off-by: Chris Mason Signed-off-by: Linus Torvalds diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 587ac08..091a364 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1481,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; + + if (need_resched()) { + /* + * We're trying to balance between building up a nice + * long list of IOs to improve our merge rate, and + * getting those IOs out quickly for anyone throttling + * in balance_dirty_pages(). cond_resched() doesn't + * unplug, so get our IOs out the door before we + * give up the CPU. + */ + blk_flush_plug(current); + cond_resched(); + } + + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) @@ -1488,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb, requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); - cond_resched_lock(&wb->list_lock); + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. -- cgit v0.10.2 From 97584d1838b7e2545c3b10aacef3327fcaa9531b Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:54 +0100 Subject: thermal: power_allocator: exit early if there are no cooling devices Don't waste cycles in the power allocator governor's throttle function if there are no cooling devices and exit early. This commit doesn't change any functionality, but should provide better performance for the odd case of a thermal zone with trip points but without cooling devices. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 718363f..7ff9627 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -346,6 +346,11 @@ static int allocate_power(struct thermal_zone_device *tz, } } + if (!num_actors) { + ret = -ENODEV; + goto unlock; + } + /* * We need to allocate five arrays of the same size: * req_power, max_power, granted_power, extra_actor_power and -- cgit v0.10.2 From 6f29b9bba7b08c6b1d6f2cc4cf750b342fc1946c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sun, 20 Sep 2015 23:03:28 +0800 Subject: NFS: Do cleanup before resetting pageio read/write to mds There is a reference leak of layout segment after resetting pageio read/write to mds. Signed-off-by: Kinglong Mee Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ae0ff7a..01b8cc8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; /* read path should never have more than one mirror */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 388f480..72624dc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; nfs_pageio_stop_mirroring(pgio); -- cgit v0.10.2 From 8714d46dc5b95a6a898ec8f1e67237c7995adfc6 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sun, 20 Sep 2015 23:04:22 +0800 Subject: NFS: Fix an infinite loop when layoutget fail with BAD_STATEID If layouget fail with BAD_STATEID, restart should not using the old stateid. But, nfs client choose the layout stateid at first, and then the open stateid. To avoid the infinite loop of using bad stateid for layoutget, this patch sets the layout flag'ss NFS_LAYOUT_INVALID_STID bit to skip choosing the bad layout stateid. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 693b903..10ba6c1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7820,6 +7820,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) * Mark the bad layout state as invalid, then retry * with the current stateid. */ + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); -- cgit v0.10.2 From 1f93e4a96c9109378204c147b3eec0d0e8100fde Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Sep 2015 14:32:34 -0700 Subject: Linux 4.3-rc2 diff --git a/Makefile b/Makefile index 1a132ea..84f4b31 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* -- cgit v0.10.2 From 5fc3e64f941197d5b77abccbe0144b25c2c36f2f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Sep 2015 13:13:44 +1000 Subject: KVM: PPC: Book3S HV: Fix handling of interrupted VCPUs This fixes a bug which results in stale vcore pointers being left in the per-cpu preempted vcore lists when a VM is destroyed. The result of the stale vcore pointers is usually either a crash or a lockup inside collect_piggybacks() when another VM is run. A typical lockup message looks like: [ 472.161074] NMI watchdog: BUG: soft lockup - CPU#24 stuck for 22s! [qemu-system-ppc:7039] [ 472.161204] Modules linked in: kvm_hv kvm_pr kvm xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ses enclosure shpchp rtc_opal i2c_opal powernv_rng binfmt_misc dm_service_time scsi_dh_alua radeon i2c_algo_bit drm_kms_helper ttm drm tg3 ptp pps_core cxgb3 ipr i2c_core mdio dm_multipath [last unloaded: kvm_hv] [ 472.162111] CPU: 24 PID: 7039 Comm: qemu-system-ppc Not tainted 4.2.0-kvm+ #49 [ 472.162187] task: c000001e38512750 ti: c000001e41bfc000 task.ti: c000001e41bfc000 [ 472.162262] NIP: c00000000096b094 LR: c00000000096b08c CTR: c000000000111130 [ 472.162337] REGS: c000001e41bff520 TRAP: 0901 Not tainted (4.2.0-kvm+) [ 472.162399] MSR: 9000000100009033 CR: 24848844 XER: 00000000 [ 472.162588] CFAR: c00000000096b0ac SOFTE: 1 GPR00: c000000000111170 c000001e41bff7a0 c00000000127df00 0000000000000001 GPR04: 0000000000000003 0000000000000001 0000000000000000 0000000000874821 GPR08: c000001e41bff8e0 0000000000000001 0000000000000000 d00000000efde740 GPR12: c000000000111130 c00000000fdae400 [ 472.163053] NIP [c00000000096b094] _raw_spin_lock_irqsave+0xa4/0x130 [ 472.163117] LR [c00000000096b08c] _raw_spin_lock_irqsave+0x9c/0x130 [ 472.163179] Call Trace: [ 472.163206] [c000001e41bff7a0] [c000001e41bff7f0] 0xc000001e41bff7f0 (unreliable) [ 472.163295] [c000001e41bff7e0] [c000000000111170] __wake_up+0x40/0x90 [ 472.163375] [c000001e41bff830] [d00000000efd6fc0] kvmppc_run_core+0x1240/0x1950 [kvm_hv] [ 472.163465] [c000001e41bffa30] [d00000000efd8510] kvmppc_vcpu_run_hv+0x5a0/0xd90 [kvm_hv] [ 472.163559] [c000001e41bffb70] [d00000000e9318a4] kvmppc_vcpu_run+0x44/0x60 [kvm] [ 472.163653] [c000001e41bffba0] [d00000000e92e674] kvm_arch_vcpu_ioctl_run+0x64/0x170 [kvm] [ 472.163745] [c000001e41bffbe0] [d00000000e9263a8] kvm_vcpu_ioctl+0x538/0x7b0 [kvm] [ 472.163834] [c000001e41bffd40] [c0000000002d0f50] do_vfs_ioctl+0x480/0x7c0 [ 472.163910] [c000001e41bffde0] [c0000000002d1364] SyS_ioctl+0xd4/0xf0 [ 472.163986] [c000001e41bffe30] [c000000000009260] system_call+0x38/0xd0 [ 472.164060] Instruction dump: [ 472.164098] ebc1fff0 ebe1fff8 7c0803a6 4e800020 60000000 60000000 60420000 8bad02e2 [ 472.164224] 7fc3f378 4b6a57c1 60000000 7c210b78 89290009 792affe3 40820070 The bug is that kvmppc_run_vcpu does not correctly handle the case where a vcpu task receives a signal while its guest vcpu is executing in the guest as a result of being piggy-backed onto the execution of another vcore. In that case we need to wait for the vcpu to finish executing inside the guest, and then remove this vcore from the preempted vcores list. That way, we avoid leaving this vcpu's vcore on the preempted vcores list when the vcpu gets interrupted. Fixes: ec2571650826 Reported-by: Thomas Huth Tested-by: Thomas Huth Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9754e68..2280497 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2692,9 +2692,13 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && (vc->vcore_state == VCORE_RUNNING || - vc->vcore_state == VCORE_EXITING)) + vc->vcore_state == VCORE_EXITING || + vc->vcore_state == VCORE_PIGGYBACK)) kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE); + if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) + kvmppc_vcore_end_preempt(vc); + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { kvmppc_remove_runnable(vc, vcpu); vcpu->stat.signal_exits++; -- cgit v0.10.2 From 7e022e717f54897e396504306d0c9b61452adf4e Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 21 May 2015 13:57:04 +0530 Subject: KVM: PPC: Book3S HV: Pass the correct trap argument to kvmhv_commence_exit In guest_exit_cont we call kvmhv_commence_exit which expects the trap number as the argument. However r3 doesn't contain the trap number at this point and as a result we would be calling the function with a spurious trap number. Fix this by copying r12 into r3 before calling kvmhv_commence_exit as r12 contains the trap number. Cc: stable@vger.kernel.org # v4.1+ Fixes: eddb60fb1443 Signed-off-by: Gautham R. Shenoy Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2273dca..b98889e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1257,6 +1257,7 @@ mc_cont: bl kvmhv_accumulate_time #endif + mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop -- cgit v0.10.2 From 3eb4ee68254235e4f47bc0410538fcdaede39589 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 18 Sep 2015 08:57:28 +0200 Subject: KVM: PPC: Book3S: Take the kvm->srcu lock in kvmppc_h_logical_ci_load/store() Access to the kvm->buses (like with the kvm_io_bus_read() and -write() functions) has to be protected via the kvm->srcu lock. The kvmppc_h_logical_ci_load() and -store() functions are missing this lock so far, so let's add it there, too. This fixes the problem that the kernel reports "suspicious RCU usage" when lock debugging is enabled. Cc: stable@vger.kernel.org # v4.1+ Fixes: 99342cf8044420eebdf9297ca03a14cb6a7085a1 Signed-off-by: Thomas Huth Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index cf00916..099c79d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -829,12 +829,15 @@ int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) unsigned long size = kvmppc_get_gpr(vcpu, 4); unsigned long addr = kvmppc_get_gpr(vcpu, 5); u64 buf; + int srcu_idx; int ret; if (!is_power_of_2(size) || (size > sizeof(buf))) return H_TOO_HARD; + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; @@ -869,6 +872,7 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) unsigned long addr = kvmppc_get_gpr(vcpu, 5); unsigned long val = kvmppc_get_gpr(vcpu, 6); u64 buf; + int srcu_idx; int ret; switch (size) { @@ -892,7 +896,9 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) return H_TOO_HARD; } + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; -- cgit v0.10.2 From b7f76ea2ef6739ee484a165ffbac98deb855d3d3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Sep 2015 23:41:23 +0200 Subject: security: fix typo in security_task_prctl Signed-off-by: Jann Horn Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds diff --git a/include/linux/security.h b/include/linux/security.h index 79d85dd..2f4c1f7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -946,7 +946,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) -- cgit v0.10.2 From 24311f884189d42d40354a6f38ca218eb9aeb811 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 10:50:17 -0400 Subject: NFSv4: Recovery of recalled read delegations is broken When a read delegation is being recalled, and we're reclaiming the cached opens, we need to make sure that we only reclaim read-only modes. A previous attempt to do this, relied on retrieving the delegation type from the nfs4_opendata structure. Unfortunately, as Kinglong pointed out, this field can only be set when performing reboot recovery. Furthermore, if we call nfs4_open_recover(), then we end up clobbering the state->flags for all modes that we're not recovering... The fix is to have the delegation recall code pass this information to the recovery call, and then refactor the recovery code so that nfs4_open_delegation_recall() does not need to call nfs4_open_recover(). Reported-by: Kinglong Mee Fixes: 39f897fdbd46 ("NFSv4: When returning a delegation, don't...") Tested-by: Kinglong Mee Cc: NeilBrown Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 2714ef8..be806ea 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -113,7 +113,8 @@ out: return status; } -static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) +static int nfs_delegation_claim_opens(struct inode *inode, + const nfs4_stateid *stateid, fmode_t type) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -140,7 +141,7 @@ again: /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - err = nfs4_open_delegation_recall(ctx, state, stateid); + err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) err = nfs_delegation_claim_locks(ctx, state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) @@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation do { if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) break; - err = nfs_delegation_claim_opens(inode, &delegation->stateid); + err = nfs_delegation_claim_opens(inode, &delegation->stateid, + delegation->type); if (!issync || err != -EAGAIN) break; /* diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index a448291..333063e 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10ba6c1..e129347 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } +static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, + fmode_t fmode) +{ + switch(fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + return state->n_rdwr != 0; + case FMODE_WRITE: + return state->n_wronly != 0; + case FMODE_READ: + return state->n_rdonly != 0; + } + WARN_ON_ONCE(1); + return false; +} + static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) { int ret = 0; @@ -1571,17 +1586,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context return opendata; } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, + fmode_t fmode) { struct nfs4_state *newstate; int ret; - if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || - opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && - (opendata->o_arg.u.delegation_type & fmode) != fmode) - /* This mode can't have been delegated, so we must have - * a valid open_stateid to cover it - not need to reclaim. - */ + if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; @@ -1597,14 +1608,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod newstate = nfs4_opendata_to_nfs4_state(opendata); if (IS_ERR(newstate)) return PTR_ERR(newstate); + if (newstate != opendata->state) + ret = -ESTALE; nfs4_close_state(newstate, fmode); - *res = newstate; - return 0; + return ret; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - struct nfs4_state *newstate; int ret; /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ @@ -1615,27 +1626,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * clear_bit(NFS_DELEGATED_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); - if (state->n_rdwr != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_wronly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_rdonly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_READ); + if (ret != 0) + return ret; /* * We may have performed cached opens for all three recoveries. * Check if we need to update the current stateid. @@ -1759,18 +1758,32 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct return err; } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, + struct nfs4_state *state, const nfs4_stateid *stateid, + fmode_t type) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; - int err; + int err = 0; opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - err = nfs4_open_recover(opendata, state); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + switch (type & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + case FMODE_WRITE: + err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (err) + break; + err = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (err) + break; + case FMODE_READ: + err = nfs4_open_recover_helper(opendata, FMODE_READ); + } nfs4_opendata_put(opendata); return nfs4_handle_delegation_recall_error(server, state, stateid, err); } -- cgit v0.10.2 From 2259f960b3a9b1020dccbf948c97311ce586db1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 13:30:30 -0400 Subject: NFSv4.x/pnfs: Don't try to recover stateids twice in layoutget If the current open or layout stateid doesn't match the stateid used in the layoutget RPC call, then don't try to recover it. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e129347..ef6b46e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7813,20 +7813,23 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", __func__, delay); rpc_delay(task, delay); - task->tk_status = 0; - rpc_restart_call_prepare(task); - goto out; /* Do not call nfs4_async_handle_error() */ + /* Do not call nfs4_async_handle_error() */ + goto out_restart; } break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: spin_lock(&inode->i_lock); - lo = NFS_I(inode)->layout; - if (!lo || list_empty(&lo->plh_segs)) { + if (nfs4_stateid_match(&lgp->args.stateid, + &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); /* If the open stateid was bad, then recover it. */ state = lgp->args.ctx->state; - } else { + break; + } + lo = NFS_I(inode)->layout; + if (lo && nfs4_stateid_match(&lgp->args.stateid, + &lo->plh_stateid)) { LIST_HEAD(head); /* @@ -7837,16 +7840,19 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); - - task->tk_status = 0; - rpc_restart_call_prepare(task); - } + } else + spin_unlock(&inode->i_lock); + goto out_restart; } if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) - rpc_restart_call_prepare(task); + goto out_restart; out: dprintk("<-- %s\n", __func__); return; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; out_overflow: task->tk_status = -EOVERFLOW; goto out; -- cgit v0.10.2 From ba5ca7848be05db6235aeb703586b821aa00e381 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 16 Sep 2015 15:27:43 +0200 Subject: bna: check for dma mapping errors Check for DMA mapping errors, recover from them and register them in ethtool stats like other errors. Cc: Rasesh Mody Signed-off-by: Ivan Vecera Acked-by: Rasesh Mody Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c index 5d0753c..04b0d16 100644 --- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c +++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c @@ -2400,6 +2400,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad, q0->rcb->id = 0; q0->rx_packets = q0->rx_bytes = 0; q0->rx_packets_with_error = q0->rxbuf_alloc_failed = 0; + q0->rxbuf_map_failed = 0; bna_rxq_qpt_setup(q0, rxp, dpage_count, PAGE_SIZE, &dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[i]); @@ -2428,6 +2429,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad, : rx_cfg->q1_buf_size; q1->rx_packets = q1->rx_bytes = 0; q1->rx_packets_with_error = q1->rxbuf_alloc_failed = 0; + q1->rxbuf_map_failed = 0; bna_rxq_qpt_setup(q1, rxp, hpage_count, PAGE_SIZE, &hqpt_mem[i], &hsqpt_mem[i], diff --git a/drivers/net/ethernet/brocade/bna/bna_types.h b/drivers/net/ethernet/brocade/bna/bna_types.h index e0e797f..c438d03 100644 --- a/drivers/net/ethernet/brocade/bna/bna_types.h +++ b/drivers/net/ethernet/brocade/bna/bna_types.h @@ -587,6 +587,7 @@ struct bna_rxq { u64 rx_bytes; u64 rx_packets_with_error; u64 rxbuf_alloc_failed; + u64 rxbuf_map_failed; }; /* RxQ pair */ diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 506047c..21a0cfc 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -399,7 +399,13 @@ bnad_rxq_refill_page(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc) } dma_addr = dma_map_page(&bnad->pcidev->dev, page, page_offset, - unmap_q->map_size, DMA_FROM_DEVICE); + unmap_q->map_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + put_page(page); + BNAD_UPDATE_CTR(bnad, rxbuf_map_failed); + rcb->rxq->rxbuf_map_failed++; + goto finishing; + } unmap->page = page; unmap->page_offset = page_offset; @@ -454,8 +460,15 @@ bnad_rxq_refill_skb(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc) rcb->rxq->rxbuf_alloc_failed++; goto finishing; } + dma_addr = dma_map_single(&bnad->pcidev->dev, skb->data, buff_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, rxbuf_map_failed); + rcb->rxq->rxbuf_map_failed++; + goto finishing; + } unmap->skb = skb; dma_unmap_addr_set(&unmap->vector, dma_addr, dma_addr); @@ -3025,6 +3038,11 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) unmap = head_unmap; dma_addr = dma_map_single(&bnad->pcidev->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, tx_skb_map_failed); + return NETDEV_TX_OK; + } BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[0].host_addr); txqent->vector[0].length = htons(len); dma_unmap_addr_set(&unmap->vectors[0], dma_addr, dma_addr); @@ -3056,6 +3074,15 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) dma_addr = skb_frag_dma_map(&bnad->pcidev->dev, frag, 0, size, DMA_TO_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + /* Undo the changes starting at tcb->producer_index */ + bnad_tx_buff_unmap(bnad, unmap_q, q_depth, + tcb->producer_index); + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, tx_skb_map_failed); + return NETDEV_TX_OK; + } + dma_unmap_len_set(&unmap->vectors[vect_id], dma_len, size); BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); txqent->vector[vect_id].length = htons(size); diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index faedbf2..f4ed816 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -175,6 +175,7 @@ struct bnad_drv_stats { u64 tx_skb_headlen_zero; u64 tx_skb_frag_zero; u64 tx_skb_len_mismatch; + u64 tx_skb_map_failed; u64 hw_stats_updates; u64 netif_rx_dropped; @@ -189,6 +190,7 @@ struct bnad_drv_stats { u64 rx_unmap_q_alloc_failed; u64 rxbuf_alloc_failed; + u64 rxbuf_map_failed; }; /* Complete driver stats */ diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 2bdfc5d..0e4fdc3 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -90,6 +90,7 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_skb_headlen_zero", "tx_skb_frag_zero", "tx_skb_len_mismatch", + "tx_skb_map_failed", "hw_stats_updates", "netif_rx_dropped", @@ -102,6 +103,7 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_unmap_q_alloc_failed", "rx_unmap_q_alloc_failed", "rxbuf_alloc_failed", + "rxbuf_map_failed", "mac_stats_clr_cnt", "mac_frame_64", @@ -807,6 +809,7 @@ bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) rx_packets_with_error; buf[bi++] = rcb->rxq-> rxbuf_alloc_failed; + buf[bi++] = rcb->rxq->rxbuf_map_failed; buf[bi++] = rcb->producer_index; buf[bi++] = rcb->consumer_index; } @@ -821,6 +824,7 @@ bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) rx_packets_with_error; buf[bi++] = rcb->rxq-> rxbuf_alloc_failed; + buf[bi++] = rcb->rxq->rxbuf_map_failed; buf[bi++] = rcb->producer_index; buf[bi++] = rcb->consumer_index; } -- cgit v0.10.2 From 0315e382704817b279e5693dca8ab9d89aa20b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Thu, 17 Sep 2015 16:01:32 +0200 Subject: net: Fix behaviour of unreachable, blackhole and prohibit routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Man page of ip-route(8) says following about route types: unreachable - these destinations are unreachable. Packets are dis‐ carded and the ICMP message host unreachable is generated. The local senders get an EHOSTUNREACH error. blackhole - these destinations are unreachable. Packets are dis‐ carded silently. The local senders get an EINVAL error. prohibit - these destinations are unreachable. Packets are discarded and the ICMP message communication administratively prohibited is generated. The local senders get an EACCES error. In the inet6 address family, this was correct, except the local senders got ENETUNREACH error instead of EHOSTUNREACH in case of unreachable route. In the inet address family, all three route types generated ICMP message net unreachable, and the local senders got ENETUNREACH error. In both address families all three route types now behave consistently with documentation. Signed-off-by: Nikola Forró Signed-off-by: David S. Miller diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a37d043..727d6e9 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -236,8 +236,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) - err = 0; + if (tb) + err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); + + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); @@ -258,7 +261,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; - int err; + int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) @@ -268,15 +271,20 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, res->tclassid = 0; - for (err = 0; !err; err = -ENETUNREACH) { - tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; + tb = rcu_dereference_rtnl(net->ipv4.fib_main); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); + + if (!err) + goto out; + + tb = rcu_dereference_rtnl(net->ipv4.fib_default); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); - tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; - } +out: + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5f4a556..c6ad99a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2045,6 +2045,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) struct fib_result res; struct rtable *rth; int orig_oif; + int err = -ENETUNREACH; res.tclassid = 0; res.fi = NULL; @@ -2153,7 +2154,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto make_route; } - if (fib_lookup(net, fl4, &res, 0)) { + err = fib_lookup(net, fl4, &res, 0); + if (err) { res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif) { @@ -2181,7 +2183,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) res.type = RTN_UNICAST; goto make_route; } - rth = ERR_PTR(-ENETUNREACH); + rth = ERR_PTR(err); goto out; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d5fa502..f204089 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1885,9 +1885,11 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: + case RTN_UNREACHABLE: default: rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : -ENETUNREACH; + : (cfg->fc_type == RTN_UNREACHABLE) + ? -EHOSTUNREACH : -ENETUNREACH; rt->dst.output = ip6_pkt_discard_out; rt->dst.input = ip6_pkt_discard; break; -- cgit v0.10.2 From fc27bd115b334e3ebdc682a42a47c3aea2566dcc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 18 Sep 2015 00:19:08 +0100 Subject: 8139cp: Use dev_kfree_skb_any() instead of dev_kfree_skb() in cp_clean_rings() This can be called from cp_tx_timeout() with interrupts disabled. Spotted by Francois Romieu Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index d79e33b..52a5334 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1151,7 +1151,7 @@ static void cp_clean_rings (struct cp_private *cp) desc = cp->rx_ring + i; dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr), cp->rx_buf_sz, PCI_DMA_FROMDEVICE); - dev_kfree_skb(cp->rx_skb[i]); + dev_kfree_skb_any(cp->rx_skb[i]); } } @@ -1164,7 +1164,7 @@ static void cp_clean_rings (struct cp_private *cp) le32_to_cpu(desc->opts1) & 0xffff, PCI_DMA_TODEVICE); if (le32_to_cpu(desc->opts1) & LastFrag) - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); cp->dev->stats.tx_dropped++; } } -- cgit v0.10.2 From 7a8a8e75d505147358b225173e890ada43a267e2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 18 Sep 2015 00:21:54 +0100 Subject: 8139cp: Call __cp_set_rx_mode() from cp_tx_timeout() Unless we reset the RX config, on real hardware I don't seem to receive any packets after a TX timeout. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 52a5334..ba3dab7 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1261,6 +1261,7 @@ static void cp_tx_timeout(struct net_device *dev) cp_clean_rings(cp); rc = cp_init_rings(cp); cp_start_hw(cp); + __cp_set_rx_mode(dev); cp_enable_irq(cp); netif_wake_queue(dev); -- cgit v0.10.2 From aab0c0e62ec4af224d1b6b40fca65055d403400b Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 18 Sep 2015 15:42:30 +0800 Subject: Revert "net/phy: Add Vitesse 8641 phy ID" This reverts commit 1298267b548a78840bd4b3e030993ff8747ca5e6. That commit claim that the Vitesse VSC8641 is compatible with Vitesse 82xx. But this is not true. It seems that all the registers used in Vitesse phy driver are not compatible between 8641 and 82xx. It does cause malfunction of the Ethernet on p1010rdb-pa board. So we definitely need a rework in order to support the 8641 phy in this driver. Signed-off-by: Kevin Hao Signed-off-by: David S. Miller diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 17cad18..76cad71 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -66,7 +66,6 @@ #define PHY_ID_VSC8244 0x000fc6c0 #define PHY_ID_VSC8514 0x00070670 #define PHY_ID_VSC8574 0x000704a0 -#define PHY_ID_VSC8641 0x00070431 #define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 #define PHY_ID_VSC8211 0x000fc4b0 @@ -273,18 +272,6 @@ static struct phy_driver vsc82xx_driver[] = { .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_VSC8641, - .name = "Vitesse VSC8641", - .phy_id_mask = 0x000ffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .config_init = &vsc824x_config_init, - .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc824x_ack_interrupt, - .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, -}, { .phy_id = PHY_ID_VSC8662, .name = "Vitesse VSC8662", .phy_id_mask = 0x000ffff0, @@ -331,7 +318,6 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = { { PHY_ID_VSC8244, 0x000fffc0 }, { PHY_ID_VSC8514, 0x000ffff0 }, { PHY_ID_VSC8574, 0x000ffff0 }, - { PHY_ID_VSC8641, 0x000ffff0 }, { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, { PHY_ID_VSC8211, 0x000ffff0 }, -- cgit v0.10.2 From 4e3ae00100945d39e1f83b7c0179a114ccf55759 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 18 Sep 2015 10:46:31 +0200 Subject: tipc: reinitialize pointer after skb linearize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The msg pointer into header may change after skb linearization. We must reinitialize it after calling skb_linearize to prevent operating on a freed or invalid pointer. Signed-off-by: Erik Hugne Reported-by: Tamás Végh Acked-by: Ying Xue Signed-off-by: David S. Miller diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 562c926..c5ac436 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -539,6 +539,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; + msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; dnode = addr_domain(net, msg_lookup_scope(msg)); -- cgit v0.10.2 From bc22a0e2ea03b75b51a1f722f93821744b5b5ff1 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 18 Sep 2015 11:47:40 +0200 Subject: iptunnel: make rx/tx bytes counters consistent This was already done a long time ago in commit 64194c31a0b6 ("inet: Make tunnel RX/TX byte counters more consistent") but tx path was broken (at least since 3.10). Before the patch the gre header was included on tx. After the patch: $ ping -c1 192.168.0.121 ; ip -s l ls dev gre1 PING 192.168.0.121 (192.168.0.121) 56(84) bytes of data. 64 bytes from 192.168.0.121: icmp_req=1 ttl=64 time=2.95 ms --- 192.168.0.121 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 2.955/2.955/2.955/0.000 ms 7: gre1@NONE: mtu 1468 qdisc noqueue state UNKNOWN mode DEFAULT group default link/gre 10.16.0.249 peer 10.16.0.121 RX: bytes packets errors dropped overrun mcast 84 1 0 0 0 0 TX: bytes packets errors dropped carrier collsns 84 1 0 0 0 0 Reported-by: Julien Meunier Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 29ed6c5..9b97204 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -51,7 +51,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { - int pkt_len = skb->len; + int pkt_len = skb->len - skb_inner_network_offset(skb); struct iphdr *iph; int err; -- cgit v0.10.2 From 83cf9a2521b0934a5f9d04082c9bb4f554fddcd4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 18 Sep 2015 11:47:41 +0200 Subject: ip6tunnel: make rx/tx bytes counters consistent Like the previous patch, which fixes ipv4 tunnels, here is the ipv6 part. Before the patch, the external ipv6 header + gre header were included on tx. After the patch: $ ping -c1 192.168.6.121 ; ip -s l ls dev ip6gre1 PING 192.168.6.121 (192.168.6.121) 56(84) bytes of data. 64 bytes from 192.168.6.121: icmp_req=1 ttl=64 time=1.92 ms --- 192.168.6.121 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 1.923/1.923/1.923/0.000 ms 7: ip6gre1@NONE: mtu 1440 qdisc noqueue state UNKNOWN mode DEFAULT group default link/gre6 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:23 peer 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:21 RX: bytes packets errors dropped overrun mcast 84 1 0 0 0 0 TX: bytes packets errors dropped carrier collsns 84 1 0 0 0 0 $ ping -c1 192.168.1.121 ; ip -s l ls dev ip6tnl1 PING 192.168.1.121 (192.168.1.121) 56(84) bytes of data. 64 bytes from 192.168.1.121: icmp_req=1 ttl=64 time=2.28 ms --- 192.168.1.121 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 2.288/2.288/2.288/0.000 ms 8: ip6tnl1@NONE: mtu 1452 qdisc noqueue state UNKNOWN mode DEFAULT group default link/tunnel6 2001:660:3008:c1c3::123 peer 2001:660:3008:c1c3::121 RX: bytes packets errors dropped overrun mcast 84 1 0 0 0 0 TX: bytes packets errors dropped carrier collsns 84 1 0 0 0 0 Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 65c2a93..fa915fa 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -86,7 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device_stats *stats = &dev->stats; int pkt_len, err; - pkt_len = skb->len; + pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { -- cgit v0.10.2 From 3afb1121800128aae9f5722e50097fcf1a9d4d88 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Sep 2015 17:33:04 +0200 Subject: KVM: x86: trap AMD MSRs for the TSeg base and mask These have roughly the same purpose as the SMRR, which we do not need to implement in KVM. However, Linux accesses MSR_K8_TSEG_ADDR at boot, which causes problems when running a Xen dom0 under KVM. Just return 0, meaning that processor protection of SMRAM is not in effect. Reported-by: M A Young Cc: stable@vger.kernel.org Acked-by: Borislav Petkov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c1c0a1c..b98b471 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -331,6 +331,7 @@ /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 #define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bbb0df..991466b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2190,6 +2190,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: case MSR_K8_SYSCFG: + case MSR_K8_TSEG_ADDR: + case MSR_K8_TSEG_MASK: case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K8_INT_PENDING_MSG: -- cgit v0.10.2 From 3ea79249e81e5ed051f2e6480cbde896d99046e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 18 Sep 2015 13:41:09 +0300 Subject: macvtap: fix TUNSETSNDBUF values > 64k Upon TUNSETSNDBUF, macvtap reads the requested sndbuf size into a local variable u. commit 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF") changed its type to u16 (which is the right thing to do for all other macvtap ioctls), breaking all values > 64k. The value of TUNSETSNDBUF is actually a signed 32 bit integer, so the right thing to do is to read it into an int. Cc: David S. Miller Fixes: 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF") Reported-by: Mark A. Peloquin Bisected-by: Matthew Rosato Reported-by: Christian Borntraeger Signed-off-by: Michael S. Tsirkin Tested-by: Matthew Rosato Acked-by: Christian Borntraeger Signed-off-by: David S. Miller diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index edd7734..248478c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1111,10 +1111,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return 0; case TUNSETSNDBUF: - if (get_user(u, up)) + if (get_user(s, sp)) return -EFAULT; - q->sk.sk_sndbuf = u; + q->sk.sk_sndbuf = s; return 0; case TUNGETVNETHDRSZ: -- cgit v0.10.2 From 1f770c0a09da855a2b51af6d19de97fb955eca85 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Sep 2015 19:16:50 +0800 Subject: netlink: Fix autobind race condition that leads to zero port ID The commit c0bb07df7d981e4091432754e30c9c720e2c0c78 ("netlink: Reset portid after netlink_insert failure") introduced a race condition where if two threads try to autobind the same socket one of them may end up with a zero port ID. This led to kernel deadlocks that were observed by multiple people. This patch reverts that commit and instead fixes it by introducing a separte rhash_portid variable so that the real portid is only set after the socket has been successfully hashed. Fixes: c0bb07df7d98 ("netlink: Reset portid after netlink_insert failure") Reported-by: Tejun Heo Reported-by: Linus Torvalds Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4cad99d..9f51608 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1031,7 +1031,7 @@ static inline int netlink_compare(struct rhashtable_compare_arg *arg, const struct netlink_compare_arg *x = arg->key; const struct netlink_sock *nlk = ptr; - return nlk->portid != x->portid || + return nlk->rhash_portid != x->portid || !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); } @@ -1057,7 +1057,7 @@ static int __netlink_insert(struct netlink_table *table, struct sock *sk) { struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); return rhashtable_lookup_insert_key(&table->hash, &arg, &nlk_sk(sk)->node, netlink_rhashtable_params); @@ -1119,7 +1119,7 @@ static int netlink_insert(struct sock *sk, u32 portid) unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; - nlk_sk(sk)->portid = portid; + nlk_sk(sk)->rhash_portid = portid; sock_hold(sk); err = __netlink_insert(table, sk); @@ -1131,10 +1131,12 @@ static int netlink_insert(struct sock *sk, u32 portid) err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; - nlk_sk(sk)->portid = 0; sock_put(sk); + goto err; } + nlk_sk(sk)->portid = portid; + err: release_sock(sk); return err; @@ -3271,7 +3273,7 @@ static inline u32 netlink_hash(const void *data, u32 len, u32 seed) const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index df9a060..80b2b75 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -25,6 +25,7 @@ struct netlink_ring { struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; + u32 rhash_portid; u32 portid; u32 dst_portid; u32 dst_group; -- cgit v0.10.2 From 95d0be61286bcc3fc987043fe3a9553adca02919 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 15 Sep 2015 06:04:36 -0700 Subject: drm/i915: fix kernel-doc warnings in intel_audio.c Fix the following 'make htmldocs' warnings: .//drivers/gpu/drm/i915/intel_audio.c:439: warning: No description found for parameter 'intel_encoder' .//drivers/gpu/drm/i915/intel_audio.c:439: warning: Excess function parameter 'encoder' description in 'intel_audio_codec_disable' .//drivers/gpu/drm/i915/intel_audio.c:439: warning: No description found for parameter 'intel_encoder' .//drivers/gpu/drm/i915/intel_audio.c:439: warning: Excess function parameter 'encoder' description in 'intel_audio_codec_disable' Signed-off-by: Geliang Tang Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 89c1a8ce..2a5c76f 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -430,7 +430,7 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder) /** * intel_audio_codec_disable - Disable the audio codec for HD audio - * @encoder: encoder on which to disable audio + * @intel_encoder: encoder on which to disable audio * * The disable sequences must be performed before disabling the transcoder or * port. -- cgit v0.10.2 From 793b8bf9ca17aee3b995c095058e6c4e7bd72e02 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Sep 2015 21:21:50 +1000 Subject: powerpc: Wire up sys_membarrier() The selftest passes on 64-bit LE & BE, and 32-bit. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4d65499..126d0c4 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -369,3 +369,4 @@ SYSCALL_SPU(bpf) COMPAT_SYS(execveat) PPC64ONLY(switch_endian) SYSCALL_SPU(userfaultfd) +SYSCALL_SPU(membarrier) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 4a055b6..13411be 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 365 +#define __NR_syscalls 366 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 6ad58d4..6337738 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -387,5 +387,6 @@ #define __NR_execveat 362 #define __NR_switch_endian 363 #define __NR_userfaultfd 364 +#define __NR_membarrier 365 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v0.10.2 From 50745b0a7f46f68574cd2b9ae24566bf026e7ebd Mon Sep 17 00:00:00 2001 From: chandan Date: Fri, 28 Aug 2015 21:10:13 +0530 Subject: Btrfs: Direct I/O: Fix space accounting The following call trace is seen when generic/095 test is executed, WARNING: CPU: 3 PID: 2769 at /home/chandan/code/repos/linux/fs/btrfs/inode.c:8967 btrfs_destroy_inode+0x284/0x2a0() Modules linked in: CPU: 3 PID: 2769 Comm: umount Not tainted 4.2.0-rc5+ #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20150306_163512-brownie 04/01/2014 ffffffff81c08150 ffff8802ec9cbce8 ffffffff81984058 ffff8802ffd8feb0 0000000000000000 ffff8802ec9cbd28 ffffffff81050385 ffff8802ec9cbd38 ffff8802d12f8588 ffff8802d12f8588 ffff8802f15ab000 ffff8800bb96c0b0 Call Trace: [] dump_stack+0x45/0x57 [] warn_slowpath_common+0x85/0xc0 [] warn_slowpath_null+0x15/0x20 [] btrfs_destroy_inode+0x284/0x2a0 [] destroy_inode+0x37/0x60 [] evict+0x109/0x170 [] dispose_list+0x35/0x50 [] evict_inodes+0xaa/0x100 [] generic_shutdown_super+0x47/0xf0 [] kill_anon_super+0x11/0x20 [] btrfs_kill_super+0x13/0x110 [] deactivate_locked_super+0x39/0x70 [] deactivate_super+0x5f/0x70 [] cleanup_mnt+0x3e/0x90 [] __cleanup_mnt+0xd/0x10 [] task_work_run+0x96/0xb0 [] do_notify_resume+0x3d/0x50 [] int_signal+0x12/0x17 This means that the inode had non-zero "outstanding extents" during eviction. This occurs because, during direct I/O a task which successfully used up its reserved data space would set BTRFS_INODE_DIO_READY bit and does not clear the bit after finishing the DIO write. A future DIO write could actually fail and the unused reserve space won't be freed because of the previously set BTRFS_INODE_DIO_READY bit. Clearing the BTRFS_INODE_DIO_READY bit in btrfs_direct_IO() caused the following issue, |-----------------------------------+-------------------------------------| | Task A | Task B | |-----------------------------------+-------------------------------------| | Start direct i/o write on inode X.| | | reserve space | | | Allocate ordered extent | | | release reserved space | | | Set BTRFS_INODE_DIO_READY bit. | | | | splice() | | | Transfer data from pipe buffer to | | | destination file. | | | - kmap(pipe buffer page) | | | - Start direct i/o write on | | | inode X. | | | - reserve space | | | - dio_refill_pages() | | | - sdio->blocks_available == 0 | | | - Since a kernel address is | | | being passed instead of a | | | user space address, | | | iov_iter_get_pages() returns | | | -EFAULT. | | | - Since BTRFS_INODE_DIO_READY is | | | set, we don't release reserved | | | space. | | | - Clear BTRFS_INODE_DIO_READY bit.| | -EIOCBQUEUED is returned. | | |-----------------------------------+-------------------------------------| Hence this commit introduces "struct btrfs_dio_data" to track the usage of reserved data space. The remaining unused "reserve space" can now be freed reliably. Signed-off-by: Chandan Rajendra Reviewed-by: Liu Bo Signed-off-by: Chris Mason diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81220b2..0ef5cc1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,8 +44,6 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 -/* DIO is ready to submit */ -#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3d983de..b7e439b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7405,6 +7405,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } +struct btrfs_dio_data { + u64 outstanding_extents; + u64 reserve; +}; static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -7412,10 +7416,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; + struct btrfs_dio_data *dio_data = NULL; u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7433,7 +7437,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * that anything that needs to check if there's a transction doesn't get * confused. */ - outstanding_extents = current->journal_info; + dio_data = current->journal_info; current->journal_info = NULL; } @@ -7565,17 +7569,18 @@ unlock: * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (*outstanding_extents) { - (*outstanding_extents)--; + if (dio_data->outstanding_extents) { + (dio_data->outstanding_extents)--; } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } - current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); - set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); + WARN_ON(dio_data->reserve < len); + dio_data->reserve -= len; + current->journal_info = dio_data; } /* @@ -7598,8 +7603,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); - if (outstanding_extents) - current->journal_info = outstanding_extents; + if (dio_data) + current->journal_info = dio_data; return ret; } @@ -8329,7 +8334,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - u64 outstanding_extents = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_dio_data dio_data = { 0 }; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8367,7 +8373,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; - outstanding_extents = div64_u64(count + + dio_data.outstanding_extents = div64_u64(count + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); @@ -8376,7 +8382,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * do the accounting properly if we go over the number we * originally calculated. Abuse current->journal_info for this. */ - current->journal_info = &outstanding_extents; + dio_data.reserve = round_up(count, root->sectorsize); + current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8391,16 +8398,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { - /* - * If the error comes from submitting stage, - * btrfs_get_blocsk_direct() has free'd data space, - * and metadata space will be handled by - * finish_ordered_fn, don't do that again to make - * sure bytes_may_use is correct. - */ - if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, - &BTRFS_I(inode)->runtime_flags)) - btrfs_delalloc_release_space(inode, count); + if (dio_data.reserve) + btrfs_delalloc_release_space(inode, + dio_data.reserve); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); -- cgit v0.10.2 From c7d778fa74a88e7b5a09ba498d66bd67a7dc7df0 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 17:54:00 +0200 Subject: net: arc: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c index f9cb99b..ffd1805 100644 --- a/drivers/net/ethernet/arc/emac_arc.c +++ b/drivers/net/ethernet/arc/emac_arc.c @@ -78,6 +78,7 @@ static const struct of_device_id emac_arc_dt_ids[] = { { .compatible = "snps,arc-emac" }, { /* Sentinel */ } }; +MODULE_DEVICE_TABLE(of, emac_arc_dt_ids); static struct platform_driver emac_arc_driver = { .probe = emac_arc_probe, -- cgit v0.10.2 From 46d5a3431f2dbb2392b501be9b9f9c2ca17737d8 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 17:54:30 +0200 Subject: net: systemport: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b9a5a97..f1b5364 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2079,6 +2079,7 @@ static const struct of_device_id bcm_sysport_of_match[] = { { .compatible = "brcm,systemport" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, bcm_sysport_of_match); static struct platform_driver bcm_sysport_driver = { .probe = bcm_sysport_probe, -- cgit v0.10.2 From e8048e5595bedb1f4b20597d059d6877a710f12d Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 17:55:02 +0200 Subject: net: bcmgenet: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index fadbd00..3bc701e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3155,6 +3155,7 @@ static const struct of_device_id bcmgenet_match[] = { { .compatible = "brcm,genet-v4", .data = (void *)GENET_V4 }, { }, }; +MODULE_DEVICE_TABLE(of, bcmgenet_match); static int bcmgenet_probe(struct platform_device *pdev) { -- cgit v0.10.2 From 23860063706f2037008fb423820cdbd0dcd558bb Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 17:55:27 +0200 Subject: net: gianfar_ptp: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Acked-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 8e3cd77..664d0c2 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -557,6 +557,7 @@ static const struct of_device_id match_table[] = { { .compatible = "fsl,etsec-ptp" }, {}, }; +MODULE_DEVICE_TABLE(of, match_table); static struct platform_driver gianfar_ptp_driver = { .driver = { -- cgit v0.10.2 From ebd8ebf078879973d0e8d2642253c091c23bd7d2 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 17:56:21 +0200 Subject: net: moxa: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index becbb5f..a10c928 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -552,6 +552,7 @@ static const struct of_device_id moxart_mac_match[] = { { .compatible = "moxa,moxart-mac" }, { } }; +MODULE_DEVICE_TABLE(of, moxart_mac_match); static struct platform_driver moxart_mac_driver = { .probe = moxart_mac_probe, -- cgit v0.10.2 From 2f90a3070690ad80c38d650e91b96b5dcbdfe2fd Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 18:16:12 +0200 Subject: net: phy: mdio-bcm-unimac: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 6a52a7f..4bde5e7 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -244,6 +244,7 @@ static const struct of_device_id unimac_mdio_ids[] = { { .compatible = "brcm,unimac-mdio", }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, unimac_mdio_ids); static struct platform_driver unimac_mdio_driver = { .driver = { -- cgit v0.10.2 From 1ccb141e31d0dc4c1234b0886cdc8b4399112c59 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Sep 2015 18:16:29 +0200 Subject: net: phy: mdio-gpio: Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 7dc21e5..3bc9f03 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -261,6 +261,7 @@ static const struct of_device_id mdio_gpio_of_match[] = { { .compatible = "virtual,mdio-gpio", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, mdio_gpio_of_match); static struct platform_driver mdio_gpio_driver = { .probe = mdio_gpio_probe, -- cgit v0.10.2 From 4a476bd6d1d923922ec950ddc4c27b279f6901eb Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sun, 20 Sep 2015 02:25:38 -0700 Subject: usbnet: New driver for QinHeng CH9200 devices There's a bunch of cheap USB 10/100 devices based on QinHeng chipsets. The vendor driver supports the CH9100 and CH9200 devices, but the majority of the code is of the if (ch9100) {} else {} form, with the most significant difference being that CH9200 provides a real MII interface but CH9100 fakes one with a bunch of global variables and magic commands. I don't have a CH9100, so it's probably better if someone who does provides an independent driver for it. In any case, this is a lightly cleaned up version of the vendor driver with all the CH9100 code dropped. Signed-off-by: Matthew Garrett Signed-off-by: David S. Miller diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 1610b79..fbb9325 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -583,4 +583,15 @@ config USB_VL600 http://ubuntuforums.org/showpost.php?p=10589647&postcount=17 +config USB_NET_CH9200 + tristate "QingHeng CH9200 USB ethernet support" + depends on USB_USBNET + select MII + help + Choose this option if you have a USB ethernet adapter with a QinHeng + CH9200 chipset. + + To compile this driver as a module, choose M here: the + module will be called ch9200. + endif # USB_NET_DRIVERS diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index cf6a0e6..b5f0406 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -38,4 +38,4 @@ obj-$(CONFIG_USB_NET_HUAWEI_CDC_NCM) += huawei_cdc_ncm.o obj-$(CONFIG_USB_VL600) += lg-vl600.o obj-$(CONFIG_USB_NET_QMI_WWAN) += qmi_wwan.o obj-$(CONFIG_USB_NET_CDC_MBIM) += cdc_mbim.o - +obj-$(CONFIG_USB_NET_CH9200) += ch9200.o diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c new file mode 100644 index 0000000..cabb670 --- /dev/null +++ b/drivers/net/usb/ch9200.c @@ -0,0 +1,443 @@ +/* + * USB 10M/100M ethernet adapter + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CH9200_VID 0x1A86 +#define CH9200_PID_E092 0xE092 + +#define CTRL_TIMEOUT_MS 1000 + +#define CONTROL_TIMEOUT_MS 1000 + +#define REQUEST_READ 0x0E +#define REQUEST_WRITE 0x0F + +/* Address space: + * 00-63 : MII + * 64-128: MAC + * + * Note: all accesses must be 16-bit + */ + +#define MAC_REG_CTRL 64 +#define MAC_REG_STATUS 66 +#define MAC_REG_INTERRUPT_MASK 68 +#define MAC_REG_PHY_COMMAND 70 +#define MAC_REG_PHY_DATA 72 +#define MAC_REG_STATION_L 74 +#define MAC_REG_STATION_M 76 +#define MAC_REG_STATION_H 78 +#define MAC_REG_HASH_L 80 +#define MAC_REG_HASH_M1 82 +#define MAC_REG_HASH_M2 84 +#define MAC_REG_HASH_H 86 +#define MAC_REG_THRESHOLD 88 +#define MAC_REG_FIFO_DEPTH 90 +#define MAC_REG_PAUSE 92 +#define MAC_REG_FLOW_CONTROL 94 + +/* Control register bits + * + * Note: bits 13 and 15 are reserved + */ +#define LOOPBACK (0x01 << 14) +#define BASE100X (0x01 << 12) +#define MBPS_10 (0x01 << 11) +#define DUPLEX_MODE (0x01 << 10) +#define PAUSE_FRAME (0x01 << 9) +#define PROMISCUOUS (0x01 << 8) +#define MULTICAST (0x01 << 7) +#define BROADCAST (0x01 << 6) +#define HASH (0x01 << 5) +#define APPEND_PAD (0x01 << 4) +#define APPEND_CRC (0x01 << 3) +#define TRANSMITTER_ACTION (0x01 << 2) +#define RECEIVER_ACTION (0x01 << 1) +#define DMA_ACTION (0x01 << 0) + +/* Status register bits + * + * Note: bits 7-15 are reserved + */ +#define ALIGNMENT (0x01 << 6) +#define FIFO_OVER_RUN (0x01 << 5) +#define FIFO_UNDER_RUN (0x01 << 4) +#define RX_ERROR (0x01 << 3) +#define RX_COMPLETE (0x01 << 2) +#define TX_ERROR (0x01 << 1) +#define TX_COMPLETE (0x01 << 0) + +/* FIFO depth register bits + * + * Note: bits 6 and 14 are reserved + */ + +#define ETH_TXBD (0x01 << 15) +#define ETN_TX_FIFO_DEPTH (0x01 << 8) +#define ETH_RXBD (0x01 << 7) +#define ETH_RX_FIFO_DEPTH (0x01 << 0) + +static int control_read(struct usbnet *dev, + unsigned char request, unsigned short value, + unsigned short index, void *data, unsigned short size, + int timeout) +{ + unsigned char *buf = NULL; + unsigned char request_type; + int err = 0; + + if (request == REQUEST_READ) + request_type = (USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER); + else + request_type = (USB_DIR_IN | USB_TYPE_VENDOR | + USB_RECIP_DEVICE); + + netdev_dbg(dev->net, "Control_read() index=0x%02x size=%d\n", + index, size); + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + + err = usb_control_msg(dev->udev, + usb_rcvctrlpipe(dev->udev, 0), + request, request_type, value, index, buf, size, + timeout); + if (err == size) + memcpy(data, buf, size); + else if (err >= 0) + err = -EINVAL; + kfree(buf); + + return err; + +err_out: + return err; +} + +static int control_write(struct usbnet *dev, unsigned char request, + unsigned short value, unsigned short index, + void *data, unsigned short size, int timeout) +{ + unsigned char *buf = NULL; + unsigned char request_type; + int err = 0; + + if (request == REQUEST_WRITE) + request_type = (USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_OTHER); + else + request_type = (USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_DEVICE); + + netdev_dbg(dev->net, "Control_write() index=0x%02x size=%d\n", + index, size); + + if (data) { + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + memcpy(buf, data, size); + } + + err = usb_control_msg(dev->udev, + usb_sndctrlpipe(dev->udev, 0), + request, request_type, value, index, buf, size, + timeout); + if (err >= 0 && err < size) + err = -EINVAL; + kfree(buf); + + return 0; + +err_out: + return err; +} + +static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc) +{ + struct usbnet *dev = netdev_priv(netdev); + unsigned char buff[2]; + + netdev_dbg(netdev, "ch9200_mdio_read phy_id:%02x loc:%02x\n", + phy_id, loc); + + if (phy_id != 0) + return -ENODEV; + + control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02, + CONTROL_TIMEOUT_MS); + + return (buff[0] | buff[1] << 8); +} + +static void ch9200_mdio_write(struct net_device *netdev, + int phy_id, int loc, int val) +{ + struct usbnet *dev = netdev_priv(netdev); + unsigned char buff[2]; + + netdev_dbg(netdev, "ch9200_mdio_write() phy_id=%02x loc:%02x\n", + phy_id, loc); + + if (phy_id != 0) + return; + + buff[0] = (unsigned char)val; + buff[1] = (unsigned char)(val >> 8); + + control_write(dev, REQUEST_WRITE, 0, loc * 2, buff, 0x02, + CONTROL_TIMEOUT_MS); +} + +static int ch9200_link_reset(struct usbnet *dev) +{ + struct ethtool_cmd ecmd; + + mii_check_media(&dev->mii, 1, 1); + mii_ethtool_gset(&dev->mii, &ecmd); + + netdev_dbg(dev->net, "link_reset() speed:%d duplex:%d\n", + ecmd.speed, ecmd.duplex); + + return 0; +} + +static void ch9200_status(struct usbnet *dev, struct urb *urb) +{ + int link; + unsigned char *buf; + + if (urb->actual_length < 16) + return; + + buf = urb->transfer_buffer; + link = !!(buf[0] & 0x01); + + if (link) { + netif_carrier_on(dev->net); + usbnet_defer_kevent(dev, EVENT_LINK_RESET); + } else { + netif_carrier_off(dev->net); + } +} + +static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb, + gfp_t flags) +{ + int i = 0; + int len = 0; + int tx_overhead = 0; + + tx_overhead = 0x40; + + len = skb->len; + if (skb_headroom(skb) < tx_overhead) { + struct sk_buff *skb2; + + skb2 = skb_copy_expand(skb, tx_overhead, 0, flags); + dev_kfree_skb_any(skb); + skb = skb2; + if (!skb) + return NULL; + } + + __skb_push(skb, tx_overhead); + /* usbnet adds padding if length is a multiple of packet size + * if so, adjust length value in header + */ + if ((skb->len % dev->maxpacket) == 0) + len++; + + skb->data[0] = len; + skb->data[1] = len >> 8; + skb->data[2] = 0x00; + skb->data[3] = 0x80; + + for (i = 4; i < 48; i++) + skb->data[i] = 0x00; + + skb->data[48] = len; + skb->data[49] = len >> 8; + skb->data[50] = 0x00; + skb->data[51] = 0x80; + + for (i = 52; i < 64; i++) + skb->data[i] = 0x00; + + return skb; +} + +static int ch9200_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + int len = 0; + int rx_overhead = 0; + + rx_overhead = 64; + + if (unlikely(skb->len < rx_overhead)) { + dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); + return 0; + } + + len = (skb->data[skb->len - 16] | skb->data[skb->len - 15] << 8); + skb_trim(skb, len); + + return 1; +} + +static int get_mac_address(struct usbnet *dev, unsigned char *data) +{ + int err = 0; + unsigned char mac_addr[0x06]; + int rd_mac_len = 0; + + netdev_dbg(dev->net, "get_mac_address:\n\tusbnet VID:%0x PID:%0x\n", + dev->udev->descriptor.idVendor, + dev->udev->descriptor.idProduct); + + memset(mac_addr, 0, sizeof(mac_addr)); + rd_mac_len = control_read(dev, REQUEST_READ, 0, + MAC_REG_STATION_L, mac_addr, 0x02, + CONTROL_TIMEOUT_MS); + rd_mac_len += control_read(dev, REQUEST_READ, 0, MAC_REG_STATION_M, + mac_addr + 2, 0x02, CONTROL_TIMEOUT_MS); + rd_mac_len += control_read(dev, REQUEST_READ, 0, MAC_REG_STATION_H, + mac_addr + 4, 0x02, CONTROL_TIMEOUT_MS); + if (rd_mac_len != ETH_ALEN) + err = -EINVAL; + + data[0] = mac_addr[5]; + data[1] = mac_addr[4]; + data[2] = mac_addr[3]; + data[3] = mac_addr[2]; + data[4] = mac_addr[1]; + data[5] = mac_addr[0]; + + return err; +} + +static int ch9200_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int retval = 0; + unsigned char data[2]; + + retval = usbnet_get_endpoints(dev, intf); + if (retval) + return retval; + + dev->mii.dev = dev->net; + dev->mii.mdio_read = ch9200_mdio_read; + dev->mii.mdio_write = ch9200_mdio_write; + dev->mii.reg_num_mask = 0x1f; + + dev->mii.phy_id_mask = 0x1f; + + dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + dev->rx_urb_size = 24 * 64 + 16; + mii_nway_restart(&dev->mii); + + data[0] = 0x01; + data[1] = 0x0F; + retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_THRESHOLD, data, + 0x02, CONTROL_TIMEOUT_MS); + + data[0] = 0xA0; + data[1] = 0x90; + retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_FIFO_DEPTH, data, + 0x02, CONTROL_TIMEOUT_MS); + + data[0] = 0x30; + data[1] = 0x00; + retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_PAUSE, data, + 0x02, CONTROL_TIMEOUT_MS); + + data[0] = 0x17; + data[1] = 0xD8; + retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_FLOW_CONTROL, + data, 0x02, CONTROL_TIMEOUT_MS); + + /* Undocumented register */ + data[0] = 0x01; + data[1] = 0x00; + retval = control_write(dev, REQUEST_WRITE, 0, 254, data, 0x02, + CONTROL_TIMEOUT_MS); + + data[0] = 0x5F; + data[1] = 0x0D; + retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_CTRL, data, 0x02, + CONTROL_TIMEOUT_MS); + + retval = get_mac_address(dev, dev->net->dev_addr); + + return retval; +} + +static const struct driver_info ch9200_info = { + .description = "CH9200 USB to Network Adaptor", + .flags = FLAG_ETHER, + .bind = ch9200_bind, + .rx_fixup = ch9200_rx_fixup, + .tx_fixup = ch9200_tx_fixup, + .status = ch9200_status, + .link_reset = ch9200_link_reset, + .reset = ch9200_link_reset, +}; + +static const struct usb_device_id ch9200_products[] = { + { + USB_DEVICE(0x1A86, 0xE092), + .driver_info = (unsigned long)&ch9200_info, + }, + {}, +}; + +MODULE_DEVICE_TABLE(usb, ch9200_products); + +static struct usb_driver ch9200_driver = { + .name = "ch9200", + .id_table = ch9200_products, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, + .suspend = usbnet_suspend, + .resume = usbnet_resume, +}; + +static int __init ch9200_init(void) +{ + return usb_register(&ch9200_driver); +} + +static void __exit ch9200_exit(void) +{ + usb_deregister(&ch9200_driver); +} + +module_init(ch9200_init); +module_exit(ch9200_exit); + +MODULE_DESCRIPTION("QinHeng CH9200 USB Network device"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 5eb8f289ac3020a9abad1c3c532ceca83284b6ed Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Fri, 18 Sep 2015 16:20:32 -0400 Subject: geneve: remove vlan-related feature assignment The code handling vlan tag insertion was dropped in commit 371bd1061d29 ("geneve: Consolidate Geneve functionality in single module."). Now we need to drop the related vlan feature bits in the netdev structure. Signed-off-by: John W. Linville Reviewed-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index da3259c..8f1e915 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -748,12 +748,8 @@ static void geneve_setup(struct net_device *dev) dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; - dev->vlan_features = dev->features; - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netif_keep_dst(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; -- cgit v0.10.2 From 4c5d283acc997a1bd7bc37cddcf7d284521cffff Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 18 Sep 2015 17:47:55 -0400 Subject: sunvnet: Invoke SET_NETDEV_DEV() to set up the vdev in vnet_new() `ls /sys/devices/channel-devices/vnet-port-0-0/net' is missing without this change, and applications like NetworkManager are looking in sysfs for the information. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 53fe200..cc106d8 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1756,7 +1756,8 @@ static const struct net_device_ops vnet_ops = { #endif }; -static struct vnet *vnet_new(const u64 *local_mac) +static struct vnet *vnet_new(const u64 *local_mac, + struct vio_dev *vdev) { struct net_device *dev; struct vnet *vp; @@ -1790,6 +1791,8 @@ static struct vnet *vnet_new(const u64 *local_mac) NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; + SET_NETDEV_DEV(dev, &vdev->dev); + err = register_netdev(dev); if (err) { pr_err("Cannot register net device, aborting\n"); @@ -1808,7 +1811,8 @@ err_out_free_dev: return ERR_PTR(err); } -static struct vnet *vnet_find_or_create(const u64 *local_mac) +static struct vnet *vnet_find_or_create(const u64 *local_mac, + struct vio_dev *vdev) { struct vnet *iter, *vp; @@ -1821,7 +1825,7 @@ static struct vnet *vnet_find_or_create(const u64 *local_mac) } } if (!vp) - vp = vnet_new(local_mac); + vp = vnet_new(local_mac, vdev); mutex_unlock(&vnet_list_mutex); return vp; @@ -1848,7 +1852,8 @@ static void vnet_cleanup(void) static const char *local_mac_prop = "local-mac-address"; static struct vnet *vnet_find_parent(struct mdesc_handle *hp, - u64 port_node) + u64 port_node, + struct vio_dev *vdev) { const u64 *local_mac = NULL; u64 a; @@ -1869,7 +1874,7 @@ static struct vnet *vnet_find_parent(struct mdesc_handle *hp, if (!local_mac) return ERR_PTR(-ENODEV); - return vnet_find_or_create(local_mac); + return vnet_find_or_create(local_mac, vdev); } static struct ldc_channel_config vnet_ldc_cfg = { @@ -1923,7 +1928,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) hp = mdesc_grab(); - vp = vnet_find_parent(hp, vdev->mp); + vp = vnet_find_parent(hp, vdev->mp, vdev); if (IS_ERR(vp)) { pr_err("Cannot find port parent vnet\n"); err = PTR_ERR(vp); -- cgit v0.10.2 From ed2e923945892a8372ab70d2f61d364b0b6d9054 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Sep 2015 09:08:34 -0700 Subject: tcp/dccp: fix timewait races in timer handling When creating a timewait socket, we need to arm the timer before allowing other cpus to find it. The signal allowing cpus to find the socket is setting tw_refcnt to non zero value. As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to call inet_twsk_schedule() first. This also means we need to remove tw_refcnt changes from inet_twsk_schedule() and let the caller handle it. Note that because we use mod_timer_pinned(), we have the guarantee the timer wont expire before we set tw_refcnt as we run in BH context. To make things more readable I introduced inet_twsk_reschedule() helper. When rearming the timer, we can use mod_timer_pending() to make sure we do not rearm a canceled timer. Note: This bug can possibly trigger if packets of a flow can hit multiple cpus. This does not normally happen, unless flow steering is broken somehow. This explains this bug was spotted ~5 months after its introduction. A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(), but will be provided in a separate patch for proper tracking. Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 879d6e5..186f3a1 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,19 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + +static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, true); +} + void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 30addee..838f524 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = sk->sk_ipv6only; } #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) timeo = DCCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ae22cc2..c67f9bd 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -123,13 +123,15 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, /* * Step 2: Hash TW into tcp ehash chain. * Notes : - * - tw_refcnt is set to 3 because : + * - tw_refcnt is set to 4 because : * - We have one reference from bhash chain. * - We have one reference from ehash chain. + * - We have one reference from timer. + * - One reference for ourself (our caller will release it). * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + atomic_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -217,7 +219,7 @@ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL(inet_twsk_deschedule_put); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) { /* timeout := RTO * 3.5 * @@ -245,12 +247,14 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) */ tw->tw_kill = timeo <= 4*HZ; - if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { - atomic_inc(&tw->tw_refcnt); + if (!rearm) { + BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo)); atomic_inc(&tw->tw_dr->tw_count); + } else { + mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } -EXPORT_SYMBOL_GPL(inet_twsk_schedule); +EXPORT_SYMBOL_GPL(__inet_twsk_schedule); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6d8795b..def7659 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -162,9 +162,9 @@ kill_with_rst: if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, tw->tw_timeout); + inet_twsk_reschedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -201,7 +201,7 @@ kill: return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -251,7 +251,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -322,9 +322,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } while (0); #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); - /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; @@ -338,6 +335,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this -- cgit v0.10.2 From 29c6852602e259d2c1882f320b29d5c3fec0de04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Sep 2015 09:48:04 -0700 Subject: inet: fix races in reqsk_queue_hash_req() Before allowing lockless LISTEN processing, we need to make sure to arm the SYN_RECV timer before the req socket is visible in hash tables. Also, req->rsk_hash should be written before we set rsk_refcnt to a non zero value. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet Cc: Ying Cai Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1349571..7bb9c39 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -685,20 +685,20 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue, req->num_timeout = 0; req->sk = NULL; + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); + req->rsk_hash = hash; + /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); atomic_set(&req->rsk_refcnt, 2); - setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); - req->rsk_hash = hash; spin_lock(&queue->syn_wait_lock); req->dl_next = lopt->syn_table[hash]; lopt->syn_table[hash] = req; spin_unlock(&queue->syn_wait_lock); - - mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } EXPORT_SYMBOL(reqsk_queue_hash_req); -- cgit v0.10.2 From 2df1b131b54f431877a6665139dac805ba5ce1a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Aug 2015 14:07:13 +0200 Subject: mac80211: fix VHT MCS mask array overrun The HT MCS mask has 9 bytes, the VHT one only has 8 streams. Split the loops to handle this correctly. Reported-by: Dan Carpenter Signed-off-by: Johannes Berg diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 17b1fe9..a30ec3c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2518,15 +2518,17 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { - if (~sdata->rc_rateidx_mcs_mask[i][j]) + if (~sdata->rc_rateidx_mcs_mask[i][j]) { sdata->rc_has_mcs_mask[i] = true; + break; + } + } - if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) + for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { + if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) { sdata->rc_has_vht_mcs_mask[i] = true; - - if (sdata->rc_has_mcs_mask[i] && - sdata->rc_has_vht_mcs_mask[i]) break; + } } } -- cgit v0.10.2 From babc305e21ea3811d98b67437299360904ac1b6a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 21 Sep 2015 15:47:40 +0300 Subject: mac80211: reset CQM history upon reconfiguration The current behavior of notifying CQM events is inconsistent: Upon first configuration there is a cqm event with the current status according to threshold configured, regardless of signal stability. When there is reconfiguration no event is sent unless there is a significant change to the signal level according to the new configuration. Since the current reconfiguration behavior might cause missing CQM events in case the current signal did not change but is on the other side of the new threshold, fix that by resetting the stored signal level upon reconfiguration. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a30ec3c..7a77a14 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2474,6 +2474,7 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, bss_conf->cqm_rssi_thold = rssi_thold; bss_conf->cqm_rssi_hyst = rssi_hyst; + sdata->u.mgd.last_cqm_event_signal = 0; /* tell the driver upon association, unless already associated */ if (sdata->u.mgd.associated && -- cgit v0.10.2 From b0b4855099e301c8603ea37da9a0103a96c2e0b1 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 22 Sep 2015 14:32:03 +0300 Subject: spi: xtensa-xtfpga: fix register endianness XTFPGA SPI controller has native endian registers. Fix register acessors so that they work in big-endian configurations. Signed-off-by: Max Filippov Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c index 2e32ea2..be6155c 100644 --- a/drivers/spi/spi-xtensa-xtfpga.c +++ b/drivers/spi/spi-xtensa-xtfpga.c @@ -34,13 +34,13 @@ struct xtfpga_spi { static inline void xtfpga_spi_write32(const struct xtfpga_spi *spi, unsigned addr, u32 val) { - iowrite32(val, spi->regs + addr); + __raw_writel(val, spi->regs + addr); } static inline unsigned int xtfpga_spi_read32(const struct xtfpga_spi *spi, unsigned addr) { - return ioread32(spi->regs + addr); + return __raw_readl(spi->regs + addr); } static inline void xtfpga_spi_wait_busy(struct xtfpga_spi *xspi) -- cgit v0.10.2 From f0e03dbd2d61d991bdd2d76b4e84681fe3077176 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 22 Sep 2015 15:30:33 +0100 Subject: MAINTAINERS: Update website and git repo for Wolfson Microelectronics Support for Wolfson Microelectronics devices is now part of Cirrus Logic and the relevant parts of the old opensource.wolfsonmicro.com site have moved to the Cirrus Logic GitHub area. This patch updates the website and git repo links, and also removes an obsolete website link for the voltage and current drivers. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab7..dffea21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11239,7 +11239,6 @@ VOLTAGE AND CURRENT REGULATOR FRAMEWORK M: Liam Girdwood M: Mark Brown L: linux-kernel@vger.kernel.org -W: http://opensource.wolfsonmicro.com/node/15 W: http://www.slimlogic.co.uk/?p=48 T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git S: Supported @@ -11368,17 +11367,15 @@ WM97XX TOUCHSCREEN DRIVERS M: Mark Brown M: Liam Girdwood L: linux-input@vger.kernel.org -T: git git://opensource.wolfsonmicro.com/linux-2.6-touch -W: http://opensource.wolfsonmicro.com/node/7 +W: https://github.com/CirrusLogic/linux-drivers/wiki S: Supported F: drivers/input/touchscreen/*wm97* F: include/linux/wm97xx.h WOLFSON MICROELECTRONICS DRIVERS L: patches@opensource.wolfsonmicro.com -T: git git://opensource.wolfsonmicro.com/linux-2.6-asoc -T: git git://opensource.wolfsonmicro.com/linux-2.6-audioplus -W: http://opensource.wolfsonmicro.com/content/linux-drivers-wolfson-devices +T: git https://github.com/CirrusLogic/linux-drivers.git +W: https://github.com/CirrusLogic/linux-drivers/wiki S: Supported F: Documentation/hwmon/wm83?? F: arch/arm/mach-s3c64xx/mach-crag6410* -- cgit v0.10.2 From 2b9dbef272b63c561aab0a5be34fd428f7b710f5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 15 Sep 2015 10:07:04 -0400 Subject: Btrfs: keep dropped roots in cache until transaction commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dropping a snapshot we need to account for the qgroup changes. If we drop the snapshot in all one go then the backref code will fail to find blocks from the snapshot we dropped since it won't be able to find the root in the fs root cache. This can lead to us failing to find refs from other roots that pointed at blocks in the now deleted root. To handle this we need to not remove the fs roots from the cache until after we process the qgroup operations. Do this by adding dropped roots to a list on the transaction, and letting the transaction remove the roots at the same time it drops the commit roots. This will keep all of the backref searching code in sync properly, and fixes a problem Mark was seeing with snapshot delete and qgroups. Thanks, Signed-off-by: Josef Bacik Tested-by: Holger Hoffstätte Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3686315..9f96042 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8665,7 +8665,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { - btrfs_drop_and_free_fs_root(tree_root->fs_info, root); + btrfs_add_dropped_root(trans, root); } else { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 68ad89e..a2d6f7b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, btrfs_unpin_free_ino(root); clear_btree_io_tree(&root->dirty_log_pages); } + + /* We can free old roots now. */ + spin_lock(&trans->dropped_roots_lock); + while (!list_empty(&trans->dropped_roots)) { + root = list_first_entry(&trans->dropped_roots, + struct btrfs_root, root_list); + list_del_init(&root->root_list); + spin_unlock(&trans->dropped_roots_lock); + btrfs_drop_and_free_fs_root(fs_info, root); + spin_lock(&trans->dropped_roots_lock); + } + spin_unlock(&trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -255,11 +267,13 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); + INIT_LIST_HEAD(&cur_trans->dropped_roots); mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->deleted_bgs_lock); + spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, } +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + + /* Add ourselves to the transaction dropped list */ + spin_lock(&cur_trans->dropped_roots_lock); + list_add_tail(&root->root_list, &cur_trans->dropped_roots); + spin_unlock(&cur_trans->dropped_roots_lock); + + /* Make sure we don't try to update the root at commit time */ + spin_lock(&root->fs_info->fs_roots_radix_lock); + radix_tree_tag_clear(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&root->fs_info->fs_roots_radix_lock); +} + int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index edc2fbc..87964bf 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -65,6 +65,7 @@ struct btrfs_transaction { struct list_head switch_commits; struct list_head dirty_bgs; struct list_head io_bgs; + struct list_head dropped_roots; u64 num_dirty_bgs; /* @@ -76,6 +77,7 @@ struct btrfs_transaction { spinlock_t dirty_bgs_lock; struct list_head deleted_bgs; spinlock_t deleted_bgs_lock; + spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; int dirty_bg_run; @@ -216,5 +218,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); - +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v0.10.2 From 8811191fdf7ed02ee07cb8469428158572d355a2 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 22 Sep 2015 21:20:22 +0200 Subject: ASoC: pxa: pxa2xx-ac97: fix dma requestor lines PCM receive and transmit DMA requestor lines were reverted, breaking the PCM playback interface for PXA platforms using the sound/soc/ variant instead of the sound/arm variant. The commit below shows the inversion in the requestor lines. Fixes: d65a14587a9b ("ASoC: pxa: use snd_dmaengine_dai_dma_data") Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 1f60546..9e4b04e 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -49,7 +49,7 @@ static struct snd_ac97_bus_ops pxa2xx_ac97_ops = { .reset = pxa2xx_ac97_cold_reset, }; -static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 12; +static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 11; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, @@ -57,7 +57,7 @@ static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .filter_data = &pxa2xx_ac97_pcm_stereo_in_req, }; -static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 11; +static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 12; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_out = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, -- cgit v0.10.2 From b838b39e930aa1cfd099ea82ac40ed6d6413af26 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 22 Sep 2015 17:03:54 -0500 Subject: PCI: Clear IORESOURCE_UNSET when clipping a bridge window c770cb4cb505 ("PCI: Mark invalid BARs as unassigned") sets IORESOURCE_UNSET if we fail to claim a resource. If we tried to claim a bridge window, failed, clipped the window, and tried to claim the clipped window, we failed again because of IORESOURCE_UNSET: pci_bus 0000:00: root bus resource [mem 0xc0000000-0xffffffff window] pci 0000:00:01.0: can't claim BAR 15 [mem 0xbdf00000-0xddefffff 64bit pref]: no compatible bridge window pci 0000:00:01.0: [mem size 0x20000000 64bit pref] clipped to [mem size 0x1df00000 64bit pref] pci 0000:00:01.0: bridge window [mem size 0x1df00000 64bit pref] pci 0000:00:01.0: can't claim BAR 15 [mem size 0x1df00000 64bit pref]: no address assigned The 00:01.0 window started as [mem 0xbdf00000-0xddefffff 64bit pref]. That starts before the host bridge window [mem 0xc0000000-0xffffffff window], so we clipped the 00:01.0 window to [mem 0xc0000000-0xddefffff 64bit pref]. But we left it marked IORESOURCE_UNSET, so the second claim failed when it should have succeeded. This means downstream devices will also fail for lack of resources, e.g., in the bugzilla below, radeon 0000:01:00.0: Fatal error during GPU init Clear IORESOURCE_UNSET when we clip a bridge window. Also clear IORESOURCE_UNSET in our copy of the unclipped window so we can see exactly what the original window was and how it now fits inside the upstream window. Fixes: c770cb4cb505 ("PCI: Mark invalid BARs as unassigned") Link: https://bugzilla.kernel.org/show_bug.cgi?id=85491#c47 Based-on-patch-by: Lorenzo Pieralisi Based-on-patch-by: Yinghai Lu Tested-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Lorenzo Pieralisi Acked-by: Yinghai Lu CC: stable@vger.kernel.org # v4.1+ diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 6fbd3f2..d3346d2 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -256,6 +256,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx) res->start = start; res->end = end; + res->flags &= ~IORESOURCE_UNSET; + orig_res.flags &= ~IORESOURCE_UNSET; dev_printk(KERN_DEBUG, &dev->dev, "%pR clipped to %pR\n", &orig_res, res); -- cgit v0.10.2 From 834e465bba38f2768747bccb5f00e951e72d2bf5 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 22 Sep 2015 06:54:47 +0800 Subject: NFS: Skip checking ds_cinfo.buckets when lseg's commit_through_mds is set When lseg's commit_through_mds is set, pnfs client always WARN once in nfs_direct_select_verf after checking ds_cinfo.nbuckets. nfs should use the DS verf except commit_through_mds is set for layout segment where nbuckets is zero. [17844.666094] ------------[ cut here ]------------ [17844.667071] WARNING: CPU: 0 PID: 21758 at /root/source/linux-pnfs/fs/nfs/direct.c:174 nfs_direct_select_verf+0x5a/0x70 [nfs]() [17844.668650] Modules linked in: nfs_layout_nfsv41_files(OE) nfsv4(OE) nfs(OE) fscache(E) nfsd(OE) xfs libcrc32c btrfs ppdev coretemp crct10dif_pclmul auth_rpcgss crc32_pclmul crc32c_intel nfs_acl ghash_clmulni_intel lockd vmw_balloon xor vmw_vmci grace raid6_pq shpchp sunrpc parport_pc i2c_piix4 parport vmwgfx drm_kms_helper ttm drm serio_raw mptspi e1000 scsi_transport_spi mptscsih mptbase ata_generic pata_acpi [last unloaded: fscache] [17844.686676] CPU: 0 PID: 21758 Comm: kworker/0:1 Tainted: G W OE 4.3.0-rc1-pnfs+ #245 [17844.687352] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [17844.698502] Workqueue: nfsiod rpc_async_release [sunrpc] [17844.699212] 0000000000000009 0000000043e58010 ffff8800454fbc10 ffffffff813680c4 [17844.699990] ffff8800454fbc48 ffffffff8108b49d ffff88004eb20000 ffff88004eb20000 [17844.700844] ffff880062e26000 0000000000000000 0000000000000001 ffff8800454fbc58 [17844.701637] Call Trace: [17844.725252] [] dump_stack+0x19/0x25 [17844.732693] [] warn_slowpath_common+0x7d/0xb0 [17844.733855] [] warn_slowpath_null+0x1a/0x20 [17844.735015] [] nfs_direct_select_verf+0x5a/0x70 [nfs] [17844.735999] [] nfs_direct_set_hdr_verf+0x23/0x90 [nfs] [17844.736846] [] nfs_direct_write_completion+0x227/0x260 [nfs] [17844.737782] [] nfs_pgio_release+0x1c/0x20 [nfs] [17844.738597] [] pnfs_generic_rw_release+0x23/0x30 [nfsv4] [17844.739486] [] rpc_free_task+0x2a/0x70 [sunrpc] [17844.740326] [] rpc_async_release+0x15/0x20 [sunrpc] [17844.741173] [] process_one_work+0x21c/0x4c0 [17844.741984] [] ? process_one_work+0x16d/0x4c0 [17844.742837] [] worker_thread+0x4a/0x440 [17844.743639] [] ? process_one_work+0x4c0/0x4c0 [17844.744399] [] ? process_one_work+0x4c0/0x4c0 [17844.745176] [] kthread+0xf5/0x110 [17844.745927] [] ? kthread_create_on_node+0x240/0x240 [17844.747105] [] ret_from_fork+0x3f/0x70 [17844.747856] [] ? kthread_create_on_node+0x240/0x240 [17844.748642] ---[ end trace 336a2845d42b83f0 ]--- Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 38678d9..4b1d08f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq, struct nfs_writeverf *verfp = &dreq->verf; #ifdef CONFIG_NFS_V4_1 - if (ds_clp) { - /* pNFS is in use, use the DS verf */ + /* + * pNFS is in use, use the DS verf except commit_through_mds is set + * for layout segment where nbuckets is zero. + */ + if (ds_clp && dreq->ds_cinfo.nbuckets > 0) { if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; else -- cgit v0.10.2 From ac5be6b47e8bd25b62bed2c82cda7398999f59e9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:58:49 -0700 Subject: userfaultfd: revert "userfaultfd: waitqueue: add nr wake parameter to __wake_up_locked_key" This reverts commit 51360155eccb907ff8635bd10fc7de876408c2e0 and adapts fs/userfaultfd.c to use the old version of that function. It didn't look robust to call __wake_up_common with "nr == 1" when we absolutely require wakeall semantics, but we've full control of what we insert in the two waitqueue heads of the blocked userfaults. No exclusive waitqueue risks to be inserted into those two waitqueue heads so we can as well stick to "nr == 1" of the old code and we can rely purely on the fact no waitqueue inserted in one of the two waitqueue heads we must enforce as wakeall, has wait->flags WQ_FLAG_EXCLUSIVE set. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f9aeb40..5031170 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -467,8 +467,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * the fault_*wqh. */ spin_lock(&ctx->fault_pending_wqh.lock); - __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, &range); - __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, &range); + __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); + __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); spin_unlock(&ctx->fault_pending_wqh.lock); wake_up_poll(&ctx->fd_wqh, POLLHUP); @@ -650,10 +650,10 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx, spin_lock(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) - __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, + __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) - __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, range); + __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range); spin_unlock(&ctx->fault_pending_wqh.lock); } diff --git a/include/linux/wait.h b/include/linux/wait.h index d3d0772..1e1bf9f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -147,8 +147,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) typedef int wait_bit_action_f(struct wait_bit_key *); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, - void *key); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); @@ -180,7 +179,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, (void *) (m)) #define wake_up_locked_poll(x, m) \ - __wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m)) + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) #define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) #define wake_up_interruptible_sync_poll(x, m) \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 272d932..052e026 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) } EXPORT_SYMBOL_GPL(__wake_up_locked); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, - void *key) +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) { - __wake_up_common(q, mode, nr, 0, key); + __wake_up_common(q, mode, 1, 0, key); } EXPORT_SYMBOL_GPL(__wake_up_locked_key); @@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, if (!list_empty(&wait->task_list)) list_del_init(&wait->task_list); else if (waitqueue_active(q)) - __wake_up_locked_key(q, mode, 1, key); + __wake_up_locked_key(q, mode, key); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(abort_exclusive_wait); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b140c09..337ca85 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -297,7 +297,7 @@ static int rpc_complete_task(struct rpc_task *task) clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); if (waitqueue_active(wq)) - __wake_up_locked_key(wq, TASK_NORMAL, 1, &k); + __wake_up_locked_key(wq, TASK_NORMAL, &k); spin_unlock_irqrestore(&wq->lock, flags); return ret; } -- cgit v0.10.2 From d0a871141d07929b559f5eae9c3fc4b63d16866b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 22 Sep 2015 14:58:52 -0700 Subject: userfaultfd: selftests: vm: pick up sanitized kernel headers Add the usr/include subdirectory of the top-level tree to the include path, and make sure to include headers without relative paths to make sure the sanitized headers get picked up. Otherwise the compiler will not be able to find the linux/compiler.h header included by the non- sanitized include/uapi/linux/userfaultfd.h. While at it, make sure to only hardcode the syscall numbers on x86 and PowerPC if they haven't been properly picked up from the headers. Signed-off-by: Thierry Reding Acked-by: Michael Ellerman Cc: Shuah Khan Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d36fab7..949e275 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,6 +1,6 @@ # Makefile for vm selftests -CFLAGS = -Wall +CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) BINARIES = compaction_test BINARIES += hugepage-mmap BINARIES += hugepage-shm diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7c1d958..59d145f 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -64,8 +64,9 @@ #include #include #include -#include "../../../../include/uapi/linux/userfaultfd.h" +#include +#ifndef __NR_userfaultfd #ifdef __x86_64__ #define __NR_userfaultfd 323 #elif defined(__i386__) @@ -77,6 +78,7 @@ #else #error "missing __NR_userfaultfd definition" #endif +#endif static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; -- cgit v0.10.2 From 67f6a029b2ccf3399783a0ff2f812666f290d94f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:58:55 -0700 Subject: userfaultfd: selftest: headers fixup Depend on "make headers_install" to create proper headers to include and provide syscall numbers. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 949e275..3c53cac 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -12,8 +12,11 @@ BINARIES += userfaultfd all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt -userfaultfd: userfaultfd.c - $(CC) $(CFLAGS) -O2 -o $@ $^ -lpthread +userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h + $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread + +../../../../usr/include/linux/kernel.h: + make -C ../../../.. headers_install TEST_PROGS := run_vmtests TEST_FILES := $(BINARIES) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 59d145f..a9e0b91 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -67,18 +67,8 @@ #include #ifndef __NR_userfaultfd -#ifdef __x86_64__ -#define __NR_userfaultfd 323 -#elif defined(__i386__) -#define __NR_userfaultfd 374 -#elif defined(__powewrpc__) -#define __NR_userfaultfd 364 -#elif defined(__s390__) -#define __NR_userfaultfd 355 -#else #error "missing __NR_userfaultfd definition" #endif -#endif static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; -- cgit v0.10.2 From 56ed8f169e225dce1f9e40f6eee2e2dabe7d06fc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Sep 2015 14:58:58 -0700 Subject: userfaultfd: selftest: only warn if __NR_userfaultfd is undefined If __NR_userfaultfd is not yet defined by the arch, warn but still build and run the userfaultfd selftest successfully. Signed-off-by: Michael Ellerman Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index a9e0b91..0671ae1 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -66,9 +66,7 @@ #include #include -#ifndef __NR_userfaultfd -#error "missing __NR_userfaultfd definition" -#endif +#ifdef __NR_userfaultfd static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; @@ -631,3 +629,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return 0; +} + +#endif /* __NR_userfaultfd */ -- cgit v0.10.2 From 1f5fee2cf232f9fac05b65f21107d2cf3c32092c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:59:00 -0700 Subject: userfaultfd: selftest: avoid my_bcmp false positives with powerpc Keep a non-zero placeholder after the count, for the my_bcmp comparison of the page against the zeropage. The lockless increment between 255 to 256 against a lockless my_bcmp could otherwise return false positives on ppc32le. Signed-off-by: Andrea Arcangeli Tested-by: Michael Ellerman Cc: Dr. David Alan Gilbert Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 0671ae1..1089709 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -465,6 +465,14 @@ static int userfaultfd_stress(void) *area_mutex(area_src, nr) = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER; count_verify[nr] = *area_count(area_src, nr) = 1; + /* + * In the transition between 255 to 256, powerpc will + * read out of order in my_bcmp and see both bytes as + * zero, so leave a placeholder below always non-zero + * after the count, to avoid my_bcmp to trigger false + * positives. + */ + *(area_count(area_src, nr) + 1) = 1; } pipefd = malloc(sizeof(int) * nr_cpus * 2); @@ -610,8 +618,8 @@ int main(int argc, char **argv) fprintf(stderr, "Usage: \n"), exit(1); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); page_size = sysconf(_SC_PAGE_SIZE); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) > - page_size) + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 + > page_size) fprintf(stderr, "Impossible to run this test\n"), exit(2); nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / nr_cpus; -- cgit v0.10.2 From a5932bf5737f0b5caf6deaa92b062e4fe66cf5b2 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:59:03 -0700 Subject: userfaultfd: selftest: return an error if BOUNCE_VERIFY fails This will report the error in the exit code, in addition of the fprintf. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 1089709..174f2fc 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -422,7 +422,7 @@ static int userfaultfd_stress(void) struct uffdio_register uffdio_register; struct uffdio_api uffdio_api; unsigned long cpu; - int uffd_flags; + int uffd_flags, err; unsigned long userfaults[nr_cpus]; if (posix_memalign(&area, page_size, nr_pages * page_size)) { @@ -499,6 +499,7 @@ static int userfaultfd_stress(void) pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, 16*1024*1024); + err = 0; while (bounces--) { unsigned long expected_ioctls; @@ -583,8 +584,9 @@ static int userfaultfd_stress(void) area_dst + nr * page_size, sizeof(pthread_mutex_t))) { fprintf(stderr, - "error mutex 2 %lu\n", + "error mutex %lu\n", nr); + err = 1; bounces = 0; } if (*area_count(area_dst, nr) != count_verify[nr]) { @@ -593,6 +595,7 @@ static int userfaultfd_stress(void) *area_count(area_src, nr), count_verify[nr], nr); + err = 1; bounces = 0; } } @@ -609,7 +612,7 @@ static int userfaultfd_stress(void) printf("\n"); } - return 0; + return err; } int main(int argc, char **argv) -- cgit v0.10.2 From 5dd01be14565df814408327971775f36e55bf5e3 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:59:06 -0700 Subject: userfaultfd: selftest: don't error out if pthread_mutex_t isn't identical On ppc big endian this check fails, the mutex doesn't necessarily need to be identical for all pages after pthread_mutex_lock/unlock cycles. The count verification (outside of the pthread_mutex_t structure) suffices and that is retained. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 174f2fc..d77ed41 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -580,15 +580,6 @@ static int userfaultfd_stress(void) /* verification */ if (bounces & BOUNCE_VERIFY) { for (nr = 0; nr < nr_pages; nr++) { - if (my_bcmp(area_dst, - area_dst + nr * page_size, - sizeof(pthread_mutex_t))) { - fprintf(stderr, - "error mutex %lu\n", - nr); - err = 1; - bounces = 0; - } if (*area_count(area_dst, nr) != count_verify[nr]) { fprintf(stderr, "error area_count %Lu %Lu %lu\n", -- cgit v0.10.2 From 09f7298100ea9767324298ab0c7979f6d7463183 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 22 Sep 2015 14:59:09 -0700 Subject: userfaultfd: register uapi generic syscall (aarch64) Add the userfaultfd syscalls to uapi asm-generic, it was tested with postcopy live migration on aarch64 with both 4k and 64k pagesize kernels. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Andrea Arcangeli Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 8da542a..ee12400 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -709,17 +709,19 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 281 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) -#define __NR_membarrier 282 +#define __NR_userfaultfd 282 +__SYSCALL(__NR_userfaultfd, sys_userfaultfd) +#define __NR_membarrier 283 __SYSCALL(__NR_membarrier, sys_membarrier) #undef __NR_syscalls -#define __NR_syscalls 283 +#define __NR_syscalls 284 /* * All syscalls below here should go away really, * these are provided for both review and as a porting * help for the C library version. -* + * * Last chance: are any of these important enough to * enable by default? */ -- cgit v0.10.2 From 8a04446ab0cf4f35d9f583cd6adcbf7c534e4995 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 22 Sep 2015 14:59:12 -0700 Subject: mm, dax: VMA with vm_ops->pfn_mkwrite wants to be write-notified For VM_PFNMAP and VM_MIXEDMAP we use vm_ops->pfn_mkwrite instead of vm_ops->page_mkwrite to notify abort write access. This means we want vma->vm_page_prot to be write-protected if the VMA provides this vm_ops. A theoretical scenario that will cause these missed events is: On writable mapping with vm_ops->pfn_mkwrite, but without vm_ops->page_mkwrite: read fault followed by write access to the pfn. Writable pte will be set up on read fault and write fault will not be generated. I found it examining Dave's complaint on generic/080: http://lkml.kernel.org/g/20150831233803.GO3902@dastard Although I don't think it's the reason. It shouldn't be a problem for ext2/ext4 as they provide both pfn_mkwrite and page_mkwrite. [akpm@linux-foundation.org: add local vm_ops to avoid 80-cols mess] Signed-off-by: Kirill A. Shutemov Cc: Yigal Korman Acked-by: Boaz Harrosh Cc: Matthew Wilcox Cc: Jan Kara Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index c739d6d..79bcc9f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1490,13 +1490,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) int vma_wants_writenotify(struct vm_area_struct *vma) { vm_flags_t vm_flags = vma->vm_flags; + const struct vm_operations_struct *vm_ops = vma->vm_ops; /* If it was private or non-writable, the write bit is already clear */ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) return 0; /* The backer wishes to know when pages are first written to? */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) + if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) return 1; /* The open routine did something to the protections that pgprot_modify -- cgit v0.10.2 From 3aaa76e125c1dd58c9b599baa8c6021896874c12 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 22 Sep 2015 14:59:14 -0700 Subject: mm: migrate: hugetlb: putback destination hugepage to active list Since commit bcc54222309c ("mm: hugetlb: introduce page_huge_active") each hugetlb page maintains its active flag to avoid a race condition betwe= en multiple calls of isolate_huge_page(), but current kernel doesn't set the f= lag on a hugepage allocated by migration because the proper putback routine isn= 't called. This means that users could still encounter the race referred to by bcc54222309c in this special case, so this patch fixes it. Fixes: bcc54222309c ("mm: hugetlb: introduce page_huge_active") Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: Andi Kleen Cc: Hugh Dickins Cc: [4.1.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/migrate.c b/mm/migrate.c index c3cb566..7452a00 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1075,7 +1075,7 @@ out: if (rc != MIGRATEPAGE_SUCCESS && put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) -- cgit v0.10.2 From 769a8089c1fd2fe94c13e66fe6e03d7820953ee3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 22 Sep 2015 14:59:17 -0700 Subject: x86, efi, kasan: #undef memset/memcpy/memmove per arch In not-instrumented code KASAN replaces instrumented memset/memcpy/memmove with not-instrumented analogues __memset/__memcpy/__memove. However, on x86 the EFI stub is not linked with the kernel. It uses not-instrumented mem*() functions from arch/x86/boot/compressed/string.c So we don't replace them with __mem*() variants in EFI stub. On ARM64 the EFI stub is linked with the kernel, so we should replace mem*() functions with __mem*(), because the EFI stub runs before KASAN sets up early shadow. So let's move these #undef mem* into arch's asm/efi.h which is also included by the EFI stub. Also, this will fix the warning in 32-bit build reported by kbuild test robot: efi-stub-helper.c:599:2: warning: implicit declaration of function 'memcpy' [akpm@linux-foundation.org: use 80 cols in comment] Signed-off-by: Andrey Ryabinin Reported-by: Fengguang Wu Cc: Will Deacon Cc: Catalin Marinas Cc: Matt Fleming Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 155162e..ab5f1d4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,16 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove + #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index e334a01..6b6548f 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,10 +5,6 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL) -#undef memcpy -#undef memset -#undef memmove - void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, -- cgit v0.10.2 From d046b770c9fc36ccb19c27afdb8322220108cbc7 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: lib/iommu-common.c: do not try to deref a null iommu->lazy_flush() pointer when n < pool->hint The check for invoking iommu->lazy_flush() from iommu_tbl_range_alloc() has to be refactored so that we only call ->lazy_flush() if it is non-null. I had a sparc kernel that was crashing when I was trying to process some very large perf.data files- the crash happens when the scsi driver calls into dma_4v_map_sg and thus the iommu_tbl_range_alloc(). Signed-off-by: Sowmini Varadhan Cc: Benjamin Herrenschmidt Cc: Guenter Roeck Cc: David S. Miller Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/iommu-common.c b/lib/iommu-common.c index ff19f66..b1c93e9 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -21,8 +21,7 @@ static DEFINE_PER_CPU(unsigned int, iommu_hash_common); static inline bool need_flush(struct iommu_map_table *iommu) { - return (iommu->lazy_flush != NULL && - (iommu->flags & IOMMU_NEED_FLUSH) != 0); + return ((iommu->flags & IOMMU_NEED_FLUSH) != 0); } static inline void set_flush(struct iommu_map_table *iommu) @@ -211,7 +210,8 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, goto bail; } } - if (n < pool->hint || need_flush(iommu)) { + if (iommu->lazy_flush && + (n < pool->hint || need_flush(iommu))) { clear_flush(iommu); iommu->lazy_flush(iommu); } -- cgit v0.10.2 From d5028f9f7d8de5c375c52b98976b6f310e73398f Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: vmscan: fix sane_reclaim helper for legacy memcg The sane_reclaim() helper is supposed to return false for memcg reclaim if the legacy hierarchy is used, because the latter lacks dirty throttling mechanism, and so it did before it was accidentally broken by commit 33398cf2f360c ("memcg: export struct mem_cgroup"). Fix it. Fixes: 33398cf2f360c ("memcg: export struct mem_cgroup") Signed-off-by: Vladimir Davydov Acked-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 2d978b2..7f63a93 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc) if (!memcg) return true; #ifdef CONFIG_CGROUP_WRITEBACK - if (memcg->css.cgroup) + if (cgroup_on_dfl(memcg->css.cgroup)) return true; #endif return false; -- cgit v0.10.2 From 7a07b503bf249986a1eeef0351d66cac0d8bf721 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: membarrier: clean up selftest We don't need to specify an explicit rule in the Makefile, the implicit one will do the same. The "__EXPORTED_HEADERS__" define is not needed, because we build the test against the installed kernel headers, not the in-tree kernel headers. Re-use "$(TEST_PROGS)" in the clean target rather than spelling the executable name twice. Include rather than the rather specific . Include rather than . In both cases, the former header is located in a standard location and includes the latter. Signed-off-by: Mathieu Desnoyers Acked-by: Michael Ellerman Cc: Pranith Kumar Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile index 877a503..a1a9708 100644 --- a/tools/testing/selftests/membarrier/Makefile +++ b/tools/testing/selftests/membarrier/Makefile @@ -1,11 +1,10 @@ CFLAGS += -g -I../../../../usr/include/ -all: - $(CC) $(CFLAGS) membarrier_test.c -o membarrier_test - TEST_PROGS := membarrier_test +all: $(TEST_PROGS) + include ../lib.mk clean: - $(RM) membarrier_test + $(RM) $(TEST_PROGS) diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index dde3125..535f0fe 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -1,9 +1,6 @@ #define _GNU_SOURCE -#define __EXPORTED_HEADERS__ - #include -#include -#include +#include #include #include #include -- cgit v0.10.2 From 012572d4fc2e4ddd5c8ec8614d51414ec6cae02a Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: ocfs2/dlm: fix deadlock when dispatch assert master The order of the following three spinlocks should be: dlm_domain_lock < dlm_ctxt->spinlock < dlm_lock_resource->spinlock But dlm_dispatch_assert_master() is called while holding dlm_ctxt->spinlock and dlm_lock_resource->spinlock, and then it calls dlm_grab() which will take dlm_domain_lock. Once another thread (for example, dlm_query_join_handler) has already taken dlm_domain_lock, and tries to take dlm_ctxt->spinlock deadlock happens. Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Cc: "Junxiao Bi" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 46b8b2b..ee5aa4d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1658,15 +1659,18 @@ send_response: mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ce12e0b..3d90ad7 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; + int dispatched = 0; if (!dlm_grab(dlm)) { /* since the domain has gone away on this @@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, dlm_put(dlm); /* sender will take care of this and retry */ return ret; - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { /* put.. incase we are not the master */ @@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, } spin_unlock(&dlm->spinlock); - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return master; } -- cgit v0.10.2 From 08399efc631960c827cebcfe83ad7ed54ebc012b Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 21 Sep 2015 10:29:09 -0400 Subject: geneve: ensure ECN info is handled properly in all tx/rx paths Partially due to a pre-exising "thinko", the new metadata-based tx/rx paths were handling ECN propagation differently than the traditional tx/rx paths. This patch removes the "thinko" (involving multiple ip_hdr assignments) on the rx path and corrects the ECN handling on both the rx and tx paths. Signed-off-by: John W. Linville Reviewed-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 8f1e915..4ce8b7b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -126,6 +126,8 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) __be32 addr; int err; + iph = ip_hdr(skb); /* outer IP header... */ + if (gs->collect_md) { static u8 zero_vni[3]; @@ -133,7 +135,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) addr = 0; } else { vni = gnvh->vni; - iph = ip_hdr(skb); /* Still outer IP header... */ addr = iph->saddr; } @@ -178,7 +179,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) skb_reset_network_header(skb); - iph = ip_hdr(skb); /* Now inner IP header... */ err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { @@ -626,6 +626,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) struct geneve_sock *gs = geneve->sock; struct ip_tunnel_info *info = NULL; struct rtable *rt = NULL; + const struct iphdr *iip; /* interior IP header */ struct flowi4 fl4; __u8 tos, ttl; __be16 sport; @@ -653,6 +654,8 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); skb_reset_mac_header(skb); + iip = ip_hdr(skb); + if (info) { const struct ip_tunnel_key *key = &info->key; u8 *opts = NULL; @@ -668,19 +671,16 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(err)) goto err; - tos = key->tos; + tos = ip_tunnel_ecn_encap(key->tos, iip, skb); ttl = key->ttl; df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; } else { - const struct iphdr *iip; /* interior IP header */ - udp_csum = false; err = geneve_build_skb(rt, skb, 0, geneve->vni, 0, NULL, udp_csum); if (unlikely(err)) goto err; - iip = ip_hdr(skb); tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); ttl = geneve->ttl; if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) -- cgit v0.10.2 From 53adc9e83028d9e35b6408231ebaf62a94a16e4d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 21 Sep 2015 21:42:59 +0100 Subject: net: dsa: actually force the speed on the CPU port Commit 54d792f257c6 ("net: dsa: Centralise global and port setup code into mv88e6xxx.") merged in the 4.2 merge window broke the link speed forcing for the CPU port of Marvell DSA switches. The original code was: /* MAC Forcing register: don't force link, speed, duplex * or flow control state to any particular values on physical * ports, but force the CPU port and all DSA ports to 1000 Mb/s * full duplex. */ if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) REG_WRITE(addr, 0x01, 0x003e); else REG_WRITE(addr, 0x01, 0x0003); but the new code does a read-modify-write: reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); if (dsa_is_cpu_port(ds, port) || ds->dsa_port_mask & (1 << port)) { reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP | PORT_PCS_CTRL_DUPLEX_FULL | PORT_PCS_CTRL_FORCE_DUPLEX; if (mv88e6xxx_6065_family(ds)) reg |= PORT_PCS_CTRL_100; else reg |= PORT_PCS_CTRL_1000; The link speed in the PCS control register is a two bit field. Forcing the link speed in this way doesn't ensure that the bit field is set to the correct value - on the hardware I have here, the speed bitfield remains set to 0x03, resulting in the speed not being forced to gigabit. We must clear both bits before forcing the link speed. Fixes: 54d792f257c6 ("net: dsa: Centralise global and port setup code into mv88e6xxx.") Signed-off-by: Russell King Acked-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 6f13f72..f8baa89 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2000,6 +2000,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) */ reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { + reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP | PORT_PCS_CTRL_DUPLEX_FULL | -- cgit v0.10.2 From ae5f2fb1d51fa128a460bcfbe3c56d7ab8bf6a43 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 21 Sep 2015 20:21:20 -0700 Subject: openvswitch: Zero flows on allocation. When support for megaflows was introduced, OVS needed to start installing flows with a mask applied to them. Since masking is an expensive operation, OVS also had an optimization that would only take the parts of the flow keys that were covered by a non-zero mask. The values stored in the remaining pieces should not matter because they are masked out. While this works fine for the purposes of matching (which must always look at the mask), serialization to netlink can be problematic. Since the flow and the mask are serialized separately, the uninitialized portions of the flow can be encoded with whatever values happen to be present. In terms of functionality, this has little effect since these fields will be masked out by definition. However, it leaks kernel memory to userspace, which is a potential security vulnerability. It is also possible that other code paths could look at the masked key and get uninitialized data, although this does not currently appear to be an issue in practice. This removes the mask optimization for flows that are being installed. This was always intended to be the case as the mask optimizations were really targetting per-packet flow operations. Fixes: 03f0d916 ("openvswitch: Mega flow implementation") Signed-off-by: Jesse Gross Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6fbd2de..b816ff8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -952,7 +952,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], @@ -1080,7 +1080,7 @@ static struct sw_flow_actions *get_flow_actions(struct net *net, struct sw_flow_key masked_key; int error; - ovs_flow_mask_key(&masked_key, key, mask); + ovs_flow_mask_key(&masked_key, key, true, mask); error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d22d8e9..f2ea83b 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -57,20 +57,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) } void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) + bool full, const struct sw_flow_mask *mask) { - const long *m = (const long *)((const u8 *)&mask->key + - mask->range.start); - const long *s = (const long *)((const u8 *)src + - mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); + int start = full ? 0 : mask->range.start; + int len = full ? sizeof *dst : range_n_bytes(&mask->range); + const long *m = (const long *)((const u8 *)&mask->key + start); + const long *s = (const long *)((const u8 *)src + start); + long *d = (long *)((u8 *)dst + start); int i; - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. + /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, + * if 'full' is false the memory outside of the 'mask->range' is left + * uninitialized. This can be used as an optimization when further + * operations on 'dst' only use contents within 'mask->range'. */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + for (i = 0; i < len; i += sizeof(long)) *d++ = *s++ & *m++; } @@ -475,7 +476,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, u32 hash; struct sw_flow_key masked_key; - ovs_flow_mask_key(&masked_key, unmasked, mask); + ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 616eda1..2dd9900 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); + bool full, const struct sw_flow_mask *mask); #endif /* flow_table.h */ -- cgit v0.10.2 From 23eedbc2435ddd226717603c4f3c8efec7bdbb4d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 22 Sep 2015 09:29:49 +0200 Subject: ch9200: Convert to use module_usb_driver Converts the ch9200 driver to use the module_usb_driver() macro which makes the code smaller and a bit simpler. Signed-off-by: Tobias Klauser Acked-by: Matthew Garrett Signed-off-by: David S. Miller diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c index cabb670..5e151e6 100644 --- a/drivers/net/usb/ch9200.c +++ b/drivers/net/usb/ch9200.c @@ -426,18 +426,7 @@ static struct usb_driver ch9200_driver = { .resume = usbnet_resume, }; -static int __init ch9200_init(void) -{ - return usb_register(&ch9200_driver); -} - -static void __exit ch9200_exit(void) -{ - usb_deregister(&ch9200_driver); -} - -module_init(ch9200_init); -module_exit(ch9200_exit); +module_usb_driver(ch9200_driver); MODULE_DESCRIPTION("QinHeng CH9200 USB Network device"); MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 7def0f952eccdd0edb3c504f4dab35ee0d3aba1f Mon Sep 17 00:00:00 2001 From: Dmitriy Vyukov Date: Tue, 22 Sep 2015 10:51:52 +0200 Subject: lib: fix data race in rhashtable_rehash_one rhashtable_rehash_one() uses complex logic to update entry->next field, after INIT_RHT_NULLS_HEAD and NULLS_MARKER expansion: entry->next = 1 | ((base + off) << 1) This can be compiled along the lines of: entry->next = base + off entry->next <<= 1 entry->next |= 1 Which will break concurrent readers. NULLS value recomputation is not needed here, so just remove the complex logic. The data race was found with KernelThreadSanitizer (KTSAN). Signed-off-by: Dmitry Vyukov Acked-by: Eric Dumazet Acked-by: Thomas Graf Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index cc0c697..a54ff89 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -187,10 +187,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) head = rht_dereference_bucket(new_tbl->buckets[new_hash], new_tbl, new_hash); - if (rht_is_a_nulls(head)) - INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash); - else - RCU_INIT_POINTER(entry->next, head); + RCU_INIT_POINTER(entry->next, head); rcu_assign_pointer(new_tbl->buckets[new_hash], entry); spin_unlock(new_bucket_lock); -- cgit v0.10.2 From fbd03513bf36c4e5c2942f436f05c8eb99a3cc9e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 22 Sep 2015 11:28:14 +0200 Subject: net: dsa: Fix Marvell Egress Trailer check The Marvell Egress rx trailer check must be fixed to correctly detect bad bits in the third byte of the Eggress trailer as described in the Table 28 of the 88E6060 datasheet. The current code incorrectly omits to check the third byte and checks the fourth byte twice. Signed-off-by: Neil Armstrong Acked-by: Guenter Roeck Signed-off-by: David S. Miller diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d25efc9..b6ca089 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -78,7 +78,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, trailer = skb_tail_pointer(skb) - 4; if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || - (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) + (trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00) goto out_drop; source_port = trailer[1] & 7; -- cgit v0.10.2 From 41b578fb0e8b930f2470d3f673b0fa279e77a7b8 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 22 Sep 2015 12:15:54 -0700 Subject: drm/i915: workaround bad DSL readout v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW at least (still testing other platforms, but should be harmless elsewhere), the DSL reg reads back as 0 when read around vblank start time. This ends up confusing the atomic start/end checking code, since it causes the update to appear as if it crossed a frame count boundary. Avoid the problem by making sure we don't return scanline_offset from the get_crtc_scanline function. In moving the code there, I add to add an additional delay since it could be called and have a legitimate 0 result for some time (depending on the pixel clock). v2: move hsw dsl read hack to get_crtc_scanline (Ville) v3: use break instead of goto (Ville) update comment with workaround details (Ville) References: https://bugs.freedesktop.org/show_bug.cgi?id=91579 Signed-off-by: Jesse Barnes Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5a244ab..39d73db 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -640,6 +640,32 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3; /* + * On HSW, the DSL reg (0x70000) appears to return 0 if we + * read it just before the start of vblank. So try it again + * so we don't accidentally end up spanning a vblank frame + * increment, causing the pipe_update_end() code to squak at us. + * + * The nature of this problem means we can't simply check the ISR + * bit and return the vblank start value; nor can we use the scanline + * debug register in the transcoder as it appears to have the same + * problem. We may need to extend this to include other platforms, + * but so far testing only shows the problem on HSW. + */ + if (IS_HASWELL(dev) && !position) { + int i, temp; + + for (i = 0; i < 100; i++) { + udelay(1); + temp = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & + DSL_LINEMASK_GEN3; + if (temp != position) { + position = temp; + break; + } + } + } + + /* * See update_scanline_offset() for the details on the * scanline_offset adjustment. */ -- cgit v0.10.2 From 721a09f7393de6c28a07516dccd654c6e995944a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 15 Sep 2015 14:28:54 +0200 Subject: drm/i915: Add primary plane to mask if it's visible This fixes the warnings like "plane A assertion failure, should be disabled but not" that on the initial modeset during boot. This can happen if the primary plane is enabled by the firmware, but inheriting it fails because the DMAR is active or for other reasons. Most likely caused by commit 36750f284b3a4f19b304fda1bb7d6e9e1275ea8d Author: Maarten Lankhorst Date: Mon Jun 1 12:49:54 2015 +0200 drm/i915: update plane state during init Reported-by: Andreas Reis Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91429 Reported-and-tested-by: Emil Renner Berthing Tested-by: Andreas Reis Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8cc9264..cf418be 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15087,9 +15087,12 @@ static void readout_plane_state(struct intel_crtc *crtc, plane_state = to_intel_plane_state(p->base.state); - if (p->base.type == DRM_PLANE_TYPE_PRIMARY) + if (p->base.type == DRM_PLANE_TYPE_PRIMARY) { plane_state->visible = primary_get_hw_state(crtc); - else { + if (plane_state->visible) + crtc->base.state->plane_mask |= + 1 << drm_plane_index(&p->base); + } else { if (active) p->disable_plane(&p->base, &crtc->base); -- cgit v0.10.2 From d6eb71a6d2eda21c8cd7a4dcd6207a0d94eb6ae7 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 23 Sep 2015 08:37:59 +0530 Subject: cxl: Fix lockdep warning while creating afu_err_buff attribute Presently a lockdep warning is reported during creation of afu_err_buff bin_attribute for the afu. This is caused due to the variable attr.key not pointing to a static class key, hence the function lockdep_init_map reports this warning: BUG: key not in .data! The patch fixes this issue by calling sysfs_attr_init on the attr_eb.attr structure before populating it with the afu_err_buff file details. This will populate the attr.key variable with a static class key so that lockdep_init_map stops complaining about the lockdep key not being static. Reported-by: Daniel Axtens Signed-off-by: Vaibhav Jain Acked-by: Ian Munsie Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 25868c2..02006f71 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -592,6 +592,8 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu) /* conditionally create the add the binary file for error info buffer */ if (afu->eb_len) { + sysfs_attr_init(&afu->attr_eb.attr); + afu->attr_eb.attr.name = "afu_err_buff"; afu->attr_eb.attr.mode = S_IRUGO; afu->attr_eb.size = afu->eb_len; -- cgit v0.10.2 From 500d701f336b2771d34e46da7875a4782515a652 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 22 Sep 2015 11:35:22 +0800 Subject: NFS41: make close wait for layoutreturn If we send a layoutreturn asynchronously before close, the close might reach server first and layoutreturn would fail with BADSTATEID because there is nothing keeping the layout stateid alive. Also do not pretend sending layoutreturn if we are not. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef6b46e..f93b9cd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2658,6 +2658,15 @@ out: return err; } +static bool +nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task) +{ + if (inode == NULL || !nfs_have_layout(inode)) + return false; + + return pnfs_wait_on_layoutreturn(inode, task); +} + struct nfs4_closedata { struct inode *inode; struct nfs4_state *state; @@ -2776,6 +2785,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } + if (nfs4_wait_on_layoutreturn(inode, task)) { + nfs_release_seqid(calldata->arg.seqid); + goto out_wait; + } + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc) @@ -5321,6 +5335,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; + if (nfs4_wait_on_layoutreturn(d_data->inode, task)) + return; + if (d_data->roc) pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ba12464..8abe271 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino) mark_lseg_invalid(lseg, &tmp_list); found = true; } - /* pnfs_prepare_layoutreturn() grabs lo ref and it will be put - * in pnfs_roc_release(). We don't really send a layoutreturn but - * still want others to view us like we are sending one! - * - * If pnfs_prepare_layoutreturn() fails, it means someone else is doing - * LAYOUTRETURN, so we proceed like there are no layouts to return. - * - * ROC in three conditions: + /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn - * 3. no others are sending layoutreturn */ - if (found && !layoutreturn && pnfs_prepare_layoutreturn(lo)) + if (found && !layoutreturn) { + /* lo ref dropped in pnfs_roc_release() */ + pnfs_get_layout_hdr(lo); roc = true; + } out_noroc: spin_unlock(&ino->i_lock); @@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) spin_unlock(&ino->i_lock); } +bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; + bool sleep = false; + + /* we might not have grabbed lo reference. so need to check under + * i_lock */ + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + sleep = true; + spin_unlock(&ino->i_lock); + + if (sleep) + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + + return sleep; +} + /* * Compare two layout segments for sorting into layout cache. * We want to preferentially return RW over RO layouts, so ensure those diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 78c9351..d1990e9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); +bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); @@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) { } +static inline bool +pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) +{ + return false; +} + static inline void set_pnfs_layoutdriver(struct nfs_server *s, const struct nfs_fh *mntfh, u32 id) { -- cgit v0.10.2 From cd67d226ebd909d239d2c6e5a6abd6e2a338d1cd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 17 Sep 2015 16:42:07 +0300 Subject: drm/i915/bios: handle MIPI Sequence Block v3+ gracefully The VBT MIPI Sequence Block version 3 has forward incompatible changes: First, the block size in the header has been specified reserved, and the actual size is a separate 32-bit value within the block. The current find_section() function to will only look at the size in the block header, and, depending on what's in that now reserved size field, continue looking for other sections in the wrong place. Fix this by taking the new block size field into account. This will ensure that the lookups for other sections will work properly, as long as the new 32-bit size does not go beyond the opregion VBT mailbox size. Second, the contents of the block have been completely changed. Gracefully refuse parsing the yet unknown data version. Cc: Deepak M Cc: stable@vger.kernel.org Reviewed-by: Deepak M Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b3e437b..c19e669 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -42,7 +42,7 @@ find_section(const void *_bdb, int section_id) const struct bdb_header *bdb = _bdb; const u8 *base = _bdb; int index = 0; - u16 total, current_size; + u32 total, current_size; u8 current_id; /* skip to first section */ @@ -57,6 +57,10 @@ find_section(const void *_bdb, int section_id) current_size = *((const u16 *)(base + index)); index += 2; + /* The MIPI Sequence Block v3+ has a separate size field. */ + if (current_id == BDB_MIPI_SEQUENCE && *(base + index) >= 3) + current_size = *((const u32 *)(base + index + 1)); + if (index + current_size > total) return NULL; @@ -799,6 +803,12 @@ parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) return; } + /* Fail gracefully for forward incompatible sequence block. */ + if (sequence->version >= 3) { + DRM_ERROR("Unable to parse MIPI Sequence Block v3+\n"); + return; + } + DRM_DEBUG_DRIVER("Found MIPI sequence block\n"); block_size = get_blocksize(sequence); -- cgit v0.10.2 From 21cb13e72b02dbbb5a02477d4dd46bc2bc1cfd08 Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Wed, 23 Sep 2015 14:35:28 +0800 Subject: ASoC: rt5645: Use the type SOC_DAPM_SINGLE_AUTODISABLE to prevent the weird sound in runtime of power up Signed-off-by: Oder Chiou Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index dbc1d76..25c34fb 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -732,14 +732,14 @@ static const struct snd_kcontrol_new rt5645_mono_adc_r_mix[] = { static const struct snd_kcontrol_new rt5645_dac_l_mix[] = { SOC_DAPM_SINGLE("Stereo ADC Switch", RT5645_AD_DA_MIXER, RT5645_M_ADCMIX_L_SFT, 1, 1), - SOC_DAPM_SINGLE("DAC1 Switch", RT5645_AD_DA_MIXER, + SOC_DAPM_SINGLE_AUTODISABLE("DAC1 Switch", RT5645_AD_DA_MIXER, RT5645_M_DAC1_L_SFT, 1, 1), }; static const struct snd_kcontrol_new rt5645_dac_r_mix[] = { SOC_DAPM_SINGLE("Stereo ADC Switch", RT5645_AD_DA_MIXER, RT5645_M_ADCMIX_R_SFT, 1, 1), - SOC_DAPM_SINGLE("DAC1 Switch", RT5645_AD_DA_MIXER, + SOC_DAPM_SINGLE_AUTODISABLE("DAC1 Switch", RT5645_AD_DA_MIXER, RT5645_M_DAC1_R_SFT, 1, 1), }; -- cgit v0.10.2 From 4f4794124e0421b080a0f1f5f1207636ba55eb85 Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Wed, 23 Sep 2015 14:35:29 +0800 Subject: ASoC: rt5645: Increase the delay time to remove the pop sound Signed-off-by: Oder Chiou Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 25c34fb..61384ee9 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -1381,7 +1381,7 @@ static void hp_amp_power(struct snd_soc_codec *codec, int on) regmap_write(rt5645->regmap, RT5645_PR_BASE + RT5645_MAMP_INT_REG2, 0xfc00); snd_soc_write(codec, RT5645_DEPOP_M2, 0x1140); - mdelay(5); + msleep(40); rt5645->hp_on = true; } else { /* depop parameters */ -- cgit v0.10.2 From fce97b4d70ad632dd9c6058622492501377bbaaa Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Wed, 23 Sep 2015 14:35:30 +0800 Subject: ASoC: rt5645: Prevent the pop sound in case of playback and the jack is plugging Signed-off-by: Oder Chiou Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 61384ee9..268a28b 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2832,6 +2832,9 @@ static int rt5645_jack_detect(struct snd_soc_codec *codec, int jack_insert) } else { /* jack out */ rt5645->jack_type = 0; + regmap_update_bits(rt5645->regmap, RT5645_HP_VOL, + RT5645_L_MUTE | RT5645_R_MUTE, + RT5645_L_MUTE | RT5645_R_MUTE); regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2, RT5645_CBJ_MN_JD, RT5645_CBJ_MN_JD); regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL1, -- cgit v0.10.2 From 3daea9e3d3ecd217a63f35e63f18ea7138f2ae17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 5 Sep 2015 11:12:27 +0200 Subject: drm/amdgpu: add option to disable semaphores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide module parameter to enable/disable them. Still enabled by default. Signed-off-by: Christian König Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 668939a..e587e20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -82,6 +82,7 @@ extern int amdgpu_vm_block_size; extern int amdgpu_enable_scheduler; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; +extern int amdgpu_enable_semaphores; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0fcc0bd..adb4835 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -79,6 +79,7 @@ int amdgpu_exp_hw_support = 0; int amdgpu_enable_scheduler = 0; int amdgpu_sched_jobs = 16; int amdgpu_sched_hw_submission = 2; +int amdgpu_enable_semaphores = 1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -152,6 +153,9 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); +MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable (default), 0 = disable)"); +module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644); + static struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 068aeaf..cfd2999 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -251,6 +251,20 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync) fence_put(e->fence); kfree(e); } + + if (amdgpu_enable_semaphores) + return 0; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_fence *fence = sync->sync_to[i]; + if (!fence) + continue; + + r = fence_wait(&fence->base, false); + if (r) + return r; + } + return 0; } @@ -285,7 +299,8 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, return -EINVAL; } - if (amdgpu_enable_scheduler || (count >= AMDGPU_NUM_SYNCS)) { + if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || + (count >= AMDGPU_NUM_SYNCS)) { /* not enough room, wait manually */ r = fence_wait(&fence->base, false); if (r) -- cgit v0.10.2 From 22c01cc48301f6974868bd4a7b03e29883da1103 Mon Sep 17 00:00:00 2001 From: Anatoli Antonovitch Date: Thu, 3 Sep 2015 11:13:31 -0400 Subject: drm/amdgpu: execution barrier after fence v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Insert wait for reg mem after EOP to fix potential issue with vm context switch v2: move wait to vm_flush() use equal instead of greater than. Signed-off-by: Anatoli Antonovitch Signed-off-by: Christian König Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 53f0743..0f979ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3965,6 +3965,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); + } /** @@ -4044,6 +4045,17 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; + uint64_t addr = ring->fence_drv.gpu_addr; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ + WAIT_REG_MEM_FUNCTION(3))); /* equal */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | -- cgit v0.10.2 From 20a85ff846ffed84fba8637abbb6b1c96436c0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 5 Sep 2015 11:59:50 +0200 Subject: drm/amdgpu: use write confirm for vm_flush() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the CP waits for the write to be confirmed before invalidating. Signed-off-by: Christian König Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0f979ab..d6d330b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4059,7 +4059,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | - WRITE_DATA_DST_SEL(0))); + WRITE_DATA_DST_SEL(0)) | + WR_CONFIRM); if (vm_id < 8) { amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); -- cgit v0.10.2 From 353da3c520b47272b9e3ddbc70b81be285c0b933 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 7 Sep 2015 16:06:53 +0800 Subject: drm/amdgpu: add tracepoint for scheduler (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit track sched job status like the length of job queue and hw job queue. v2: fix build after rebase Signed-off-by: Chunming Zhou Reviewed-by: Jammy Zhou Reviewed-by: Christian König diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h new file mode 100644 index 0000000..a1f4ece --- /dev/null +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -0,0 +1,41 @@ +#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _GPU_SCHED_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_sched +#define TRACE_INCLUDE_FILE gpu_sched_trace + +TRACE_EVENT(amd_sched_job, + TP_PROTO(struct amd_sched_job *sched_job), + TP_ARGS(sched_job), + TP_STRUCT__entry( + __field(struct amd_sched_entity *, entity) + __field(u32, ring_id) + __field(u32, job_count) + __field(int, hw_job_count) + ), + + TP_fast_assign( + __entry->entity = sched_job->s_entity; + __entry->ring_id = sched_job->sched->ring_id; + __entry->job_count = kfifo_len( + &sched_job->s_entity->job_queue) / sizeof(sched_job); + __entry->hw_job_count = atomic_read( + &sched_job->sched->hw_rq_count); + ), + TP_printk("entity=%p, ring=%u, job count:%u, hw job count:%d", + __entry->entity, __entry->ring_id, __entry->job_count, + __entry->hw_job_count) +); +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 9259f1b..5ad7fa8 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -27,6 +27,9 @@ #include #include "gpu_scheduler.h" +#define CREATE_TRACE_POINTS +#include "gpu_sched_trace.h" + static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); @@ -273,7 +276,7 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) wait_event(entity->scheduler->job_scheduled, amd_sched_entity_in(sched_job)); - + trace_amd_sched_job(sched_job); return 0; } -- cgit v0.10.2 From 27439fcac03632f2b1fd85268dc61af828c77e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 2 Sep 2015 12:03:06 +0200 Subject: drm/amdgpu: signal scheduler fence when hw submission fails v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the resource blocked by it will never be reclaimed. v2: add DRM_ERROR. v3: fix typo in commit message Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Chunming Zhou Reviewed-by: Jammy Zhou diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 5ad7fa8..905fd30 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -362,6 +362,9 @@ static int amd_sched_main(void *param) else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); fence_put(fence); + } else { + DRM_ERROR("Failed to run job!\n"); + amd_sched_process_job(NULL, &job->cb); } count = kfifo_out(&entity->job_queue, &job, sizeof(job)); -- cgit v0.10.2 From 258f3f99d514172aa5a9df15e6d6ebe33aad2f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 31 Aug 2015 17:02:52 +0200 Subject: drm/amdgpu: move scheduler fence callback into fence v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And call the processed callback directly after submitting the job. v2: split adding error handling into separate patch. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang Reviewed-by: Jammy Zhou diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 905fd30..92b000d 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -319,15 +319,13 @@ amd_sched_select_job(struct amd_gpu_scheduler *sched) static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) { - struct amd_sched_job *sched_job = - container_of(cb, struct amd_sched_job, cb); - struct amd_gpu_scheduler *sched; + struct amd_sched_fence *s_fence = + container_of(cb, struct amd_sched_fence, cb); + struct amd_gpu_scheduler *sched = s_fence->scheduler; - sched = sched_job->sched; - amd_sched_fence_signal(sched_job->s_fence); atomic_dec(&sched->hw_rq_count); - fence_put(&sched_job->s_fence->base); - sched->ops->process_job(sched_job); + amd_sched_fence_signal(s_fence); + fence_put(&s_fence->base); wake_up_interruptible(&sched->wake_up_worker); } @@ -341,6 +339,7 @@ static int amd_sched_main(void *param) while (!kthread_should_stop()) { struct amd_sched_entity *entity; + struct amd_sched_fence *s_fence; struct amd_sched_job *job; struct fence *fence; @@ -352,19 +351,21 @@ static int amd_sched_main(void *param) continue; entity = job->s_entity; + s_fence = job->s_fence; atomic_inc(&sched->hw_rq_count); fence = sched->ops->run_job(job); + sched->ops->process_job(job); if (fence) { - r = fence_add_callback(fence, &job->cb, + r = fence_add_callback(fence, &s_fence->cb, amd_sched_process_job); if (r == -ENOENT) - amd_sched_process_job(fence, &job->cb); + amd_sched_process_job(fence, &s_fence->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); fence_put(fence); } else { DRM_ERROR("Failed to run job!\n"); - amd_sched_process_job(NULL, &job->cb); + amd_sched_process_job(NULL, &s_fence->cb); } count = kfifo_out(&entity->job_queue, &job, sizeof(job)); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 2af0e4d..7a0552f 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -62,13 +62,13 @@ struct amd_sched_rq { struct amd_sched_fence { struct fence base; + struct fence_cb cb; struct amd_gpu_scheduler *scheduler; spinlock_t lock; void *owner; }; struct amd_sched_job { - struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; struct amd_sched_fence *s_fence; -- cgit v0.10.2 From 1886d1a9caed20f457dd69a926c7f8b54c2d5f48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 31 Aug 2015 17:28:28 +0200 Subject: drm/amdgpu: remove process_job callback from the scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just free the resources immediately after submitting the job. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang Reviewed-by: Jammy Zhou diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index de98fbd..5724a81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -35,8 +35,8 @@ static struct fence *amdgpu_sched_dependency(struct amd_sched_job *job) static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) { + struct amdgpu_fence *fence = NULL; struct amdgpu_job *sched_job; - struct amdgpu_fence *fence; int r; if (!job) { @@ -49,41 +49,26 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) sched_job->num_ibs, sched_job->ibs, sched_job->base.owner); - if (r) + if (r) { + DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; + } + fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); +err: if (sched_job->free_job) sched_job->free_job(sched_job); mutex_unlock(&sched_job->job_lock); - return &fence->base; - -err: - DRM_ERROR("Run job error\n"); - mutex_unlock(&sched_job->job_lock); - job->sched->ops->process_job(job); - return NULL; -} - -static void amdgpu_sched_process_job(struct amd_sched_job *job) -{ - struct amdgpu_job *sched_job; - - if (!job) { - DRM_ERROR("job is null\n"); - return; - } - sched_job = (struct amdgpu_job *)job; - /* after processing job, free memory */ fence_put(&sched_job->base.s_fence->base); kfree(sched_job); + return fence ? &fence->base : NULL; } struct amd_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_sched_dependency, .run_job = amdgpu_sched_run_job, - .process_job = amdgpu_sched_process_job }; int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 92b000d..191fd51 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -354,7 +354,6 @@ static int amd_sched_main(void *param) s_fence = job->s_fence; atomic_inc(&sched->hw_rq_count); fence = sched->ops->run_job(job); - sched->ops->process_job(job); if (fence) { r = fence_add_callback(fence, &s_fence->cb, amd_sched_process_job); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 7a0552f..ac56d92 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -93,7 +93,6 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) struct amd_sched_backend_ops { struct fence *(*dependency)(struct amd_sched_job *job); struct fence *(*run_job)(struct amd_sched_job *job); - void (*process_job)(struct amd_sched_job *job); }; /** -- cgit v0.10.2 From b7d698d7fd7d132c6ebe56d230584f2cae6c94ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 7 Sep 2015 12:32:09 +0200 Subject: drm/amdgpu: fix overflow on 32bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mem->start is a long, so this can overflow on 32bit systems. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f68b7cd..b68dcb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -855,7 +855,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, int r; if (mem) { - addr = mem->start << PAGE_SHIFT; + addr = (u64)mem->start << PAGE_SHIFT; if (mem->mem_type != TTM_PL_TT) addr += adev->vm_manager.vram_base_offset; } else { -- cgit v0.10.2 From 72d7668b5ba5180b651e8a07dd6ed62e4e26f207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 3 Sep 2015 17:34:59 +0200 Subject: drm/amdgpu: export reservation_object from dmabuf to ttm (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an extra argument to amdgpu_bo_create, which is only used in amdgpu_prime.c. Port of radeon commit 831b6966a60fe72d85ae3576056b4e4e0775b112. v2: fix up kfd. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 496ed21..84d68d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -183,7 +183,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { dev_err(rdev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 98d59ee..cd639c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -79,7 +79,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, int time; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, &sobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, + NULL, &sobj); if (r) { goto out_cleanup; } @@ -91,7 +92,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, &dobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, + NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 6b1243f..1c3fc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -86,7 +86,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages); ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo); if (ret) return ret; ret = amdgpu_bo_reserve(bo, false); @@ -197,7 +197,8 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, - NULL, &placement, &obj); + NULL, &placement, NULL, + &obj); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6ff6ae9..2d569ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -246,7 +246,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->vram_scratch.robj); + NULL, NULL, &adev->vram_scratch.robj); if (r) { return r; } @@ -449,7 +449,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) if (adev->wb.wb_obj == NULL) { r = amdgpu_bo_create(adev, AMDGPU_MAX_WB * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, &adev->wb.wb_obj); + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &adev->wb.wb_obj); if (r) { dev_warn(adev->dev, "(%d) create WB bo failed\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index cbd3a48..7312d72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -127,7 +127,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->gart.robj); + NULL, NULL, &adev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5839fab..b75c3b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -69,7 +69,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, } } retry: - r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, flags, NULL, &robj); + r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, + flags, NULL, NULL, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 5c8a803..534fc04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -43,7 +43,7 @@ static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->irq.ih.ring_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, - NULL, &adev->irq.ih.ring_obj); + NULL, NULL, &adev->irq.ih.ring_obj); if (r) { DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 08b09d5..f25cfed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -215,6 +215,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct ttm_placement *placement, + struct reservation_object *resv, struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; @@ -261,7 +262,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, /* Kernel allocation are uninterruptible */ r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, - acc_size, sg, NULL, &amdgpu_ttm_bo_destroy); + acc_size, sg, resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) { return r; } @@ -275,7 +276,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, - struct sg_table *sg, struct amdgpu_bo **bo_ptr) + struct sg_table *sg, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr) { struct ttm_placement placement = {0}; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; @@ -286,11 +289,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev, amdgpu_ttm_placement_init(adev, &placement, placements, domain, flags); - return amdgpu_bo_create_restricted(adev, size, byte_align, - kernel, domain, flags, - sg, - &placement, - bo_ptr); + return amdgpu_bo_create_restricted(adev, size, byte_align, kernel, + domain, flags, sg, &placement, + resv, bo_ptr); } int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 6ea18dc..3c2ff45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -129,12 +129,14 @@ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, + struct reservation_object *resv, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_restricted(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct ttm_placement *placement, + struct reservation_object *resv, struct amdgpu_bo **bo_ptr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index d9652fe..59f735a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -61,12 +61,15 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg) { + struct reservation_object *resv = attach->dmabuf->resv; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_bo *bo; int ret; + ww_mutex_lock(&resv->lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); + ww_mutex_unlock(&resv->lock); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 9bec914..130292d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -407,7 +407,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, if (ring->ring_obj == NULL) { r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, - NULL, &ring->ring_obj); + NULL, NULL, &ring->ring_obj); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 74dad27..b70ce10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -64,8 +64,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&sa_manager->flist[i]); } - r = amdgpu_bo_create(adev, size, align, true, - domain, 0, NULL, &sa_manager->bo); + r = amdgpu_bo_create(adev, size, align, true, domain, + 0, NULL, NULL, &sa_manager->bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index f80b1a4..4865615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -59,8 +59,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, &vram_obj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, 0, + NULL, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -80,7 +81,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b5abd5c..364cbe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -861,7 +861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->stollen_vga_memory); + NULL, NULL, &adev->stollen_vga_memory); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 482e667..5cc95f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -247,7 +247,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) const struct common_firmware_header *header = NULL; err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, bo); + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); err = -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 2cf6c6b..3cc8663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -156,7 +156,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->uvd.vcpu_bo); + NULL, NULL, &adev->uvd.vcpu_bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); return r; @@ -905,7 +905,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &bo); + NULL, NULL, &bo); if (r) return r; @@ -954,7 +954,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &bo); + NULL, NULL, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 3cab96c..c90caf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -143,7 +143,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->vce.vcpu_bo); + NULL, NULL, &adev->vce.vcpu_bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b68dcb9..8927dc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1101,7 +1101,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS, - NULL, &pt); + NULL, NULL, &pt); if (r) goto error_free; @@ -1303,7 +1303,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amdgpu_bo_create(adev, pd_size, align, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS, - NULL, &vm->page_directory); + NULL, NULL, &vm->page_directory); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c b/drivers/gpu/drm/amd/amdgpu/cz_smc.c index a72ffc7..e33180d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c @@ -814,7 +814,8 @@ int cz_smu_init(struct amdgpu_device *adev) * 3. map kernel virtual address */ ret = amdgpu_bo_create(adev, priv->toc_buffer.data_size, PAGE_SIZE, - true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, toc_buf); + true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + toc_buf); if (ret) { dev_err(adev->dev, "(%d) SMC TOC buffer allocation failed\n", ret); @@ -822,7 +823,8 @@ int cz_smu_init(struct amdgpu_device *adev) } ret = amdgpu_bo_create(adev, priv->smu_buffer.data_size, PAGE_SIZE, - true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, smu_buf); + true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + smu_buf); if (ret) { dev_err(adev->dev, "(%d) SMC Internal buffer allocation failed\n", ret); diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c index 322edea..bda1249 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c @@ -764,7 +764,7 @@ int fiji_smu_init(struct amdgpu_device *adev) ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, toc_buf); + NULL, NULL, toc_buf); if (ret) { DRM_ERROR("Failed to allocate memory for TOC buffer\n"); return -ENOMEM; @@ -774,7 +774,7 @@ int fiji_smu_init(struct amdgpu_device *adev) ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, smu_buf); + NULL, NULL, smu_buf); if (ret) { DRM_ERROR("Failed to allocate memory for SMU internal buffer\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 4bd1e5c..392ec10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3206,7 +3206,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -3373,7 +3373,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, sizeof(struct bonaire_mqd), PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &ring->mqd_obj); if (r) { dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); @@ -3788,7 +3788,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->gfx.rlc.save_restore_obj); + NULL, NULL, + &adev->gfx.rlc.save_restore_obj); if (r) { dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); return r; @@ -3831,7 +3832,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->gfx.rlc.clear_state_obj); + NULL, NULL, + &adev->gfx.rlc.clear_state_obj); if (r) { dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v7_0_rlc_fini(adev); @@ -3870,7 +3872,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, &adev->gfx.rlc.cp_table_obj); + NULL, NULL, + &adev->gfx.rlc.cp_table_obj); if (r) { dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); gfx_v7_0_rlc_fini(adev); @@ -4851,21 +4854,21 @@ static int gfx_v7_0_sw_init(void *handle) r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GDS, 0, - NULL, &adev->gds.gds_gfx_bo); + NULL, NULL, &adev->gds.gds_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GWS, 0, - NULL, &adev->gds.gws_gfx_bo); + NULL, NULL, &adev->gds.gws_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_OA, 0, - NULL, &adev->gds.oa_gfx_bo); + NULL, NULL, &adev->gds.oa_gfx_bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d6d330b..78e5900 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -868,7 +868,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -995,21 +995,21 @@ static int gfx_v8_0_sw_init(void *handle) /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GDS, 0, + AMDGPU_GEM_DOMAIN_GDS, 0, NULL, NULL, &adev->gds.gds_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GWS, 0, + AMDGPU_GEM_DOMAIN_GWS, 0, NULL, NULL, &adev->gds.gws_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_OA, 0, + AMDGPU_GEM_DOMAIN_OA, 0, NULL, NULL, &adev->gds.oa_gfx_bo); if (r) return r; @@ -3106,7 +3106,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) sizeof(struct vi_mqd), PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - &ring->mqd_obj); + NULL, &ring->mqd_obj); if (r) { dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c index c900aa9..966d4b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c @@ -625,7 +625,7 @@ int iceland_smu_init(struct amdgpu_device *adev) ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, toc_buf); + NULL, NULL, toc_buf); if (ret) { DRM_ERROR("Failed to allocate memory for TOC buffer\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c index 1f5ac94..5421309 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c @@ -763,7 +763,7 @@ int tonga_smu_init(struct amdgpu_device *adev) ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, toc_buf); + NULL, NULL, toc_buf); if (ret) { DRM_ERROR("Failed to allocate memory for TOC buffer\n"); return -ENOMEM; @@ -773,7 +773,7 @@ int tonga_smu_init(struct amdgpu_device *adev) ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, smu_buf); + NULL, NULL, smu_buf); if (ret) { DRM_ERROR("Failed to allocate memory for SMU internal buffer\n"); return -ENOMEM; -- cgit v0.10.2 From a5b750583eb4af69da1e659c7684b6d370b2ae97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 3 Sep 2015 16:40:39 +0200 Subject: drm/amdgpu: validate duplicates in the CS as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows for multiple BOs to have the same reservation object. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3b355ae..4b92e38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -321,25 +321,17 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) return max(bytes_moved_threshold, 1024*1024ull); } -int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p) +int amdgpu_cs_list_validate(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct list_head *validated) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_device *adev = p->adev; struct amdgpu_bo_list_entry *lobj; - struct list_head duplicates; struct amdgpu_bo *bo; u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); int r; - INIT_LIST_HEAD(&duplicates); - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) { - return r; - } - - list_for_each_entry(lobj, &p->validated, tv.head) { + list_for_each_entry(lobj, validated, tv.head) { bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->prefered_domains; @@ -373,7 +365,6 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p) domain = lobj->allowed_domains; goto retry; } - ttm_eu_backoff_reservation(&p->ticket, &p->validated); return r; } } @@ -386,6 +377,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_cs_buckets buckets; + struct list_head duplicates; bool need_mmap_lock = false; int i, r; @@ -405,8 +397,22 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) if (need_mmap_lock) down_read(¤t->mm->mmap_sem); - r = amdgpu_cs_list_validate(p); + INIT_LIST_HEAD(&duplicates); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); + if (unlikely(r != 0)) + goto error_reserve; + + r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); + if (r) + goto error_validate; + + r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); + +error_validate: + if (r) + ttm_eu_backoff_reservation(&p->ticket, &p->validated); +error_reserve: if (need_mmap_lock) up_read(¤t->mm->mmap_sem); -- cgit v0.10.2 From bf60efd353f68e5dec1a177b5cbe4da07c819569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 4 Sep 2015 10:47:56 +0200 Subject: drm/amdgpu: use only one reservation object for each VM v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduces the locking and fencing overhead. v2: add comment why we need the duplicates list in the GEM op. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Jammy Zhou diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b75c3b2..2f39fea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -455,11 +455,12 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct ttm_validate_buffer tv, *entry; struct amdgpu_bo_list_entry *vm_bos; struct ww_acquire_ctx ticket; - struct list_head list; + struct list_head list, duplicates; unsigned domain; int r; INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); tv.bo = &bo_va->bo->tbo; tv.shared = true; @@ -469,7 +470,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (!vm_bos) return; - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); + /* Provide duplicates to avoid -EALREADY */ + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); if (r) goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8927dc6..d30fbec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -686,31 +686,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, } /** - * amdgpu_vm_fence_pts - fence page tables after an update - * - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @fence: fence to use - * - * Fence the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! - */ -static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, - uint64_t start, uint64_t end, - struct fence *fence) -{ - unsigned i; - - start >>= amdgpu_vm_block_size; - end >>= amdgpu_vm_block_size; - - for (i = start; i <= end; ++i) - amdgpu_bo_fence(vm->page_tables[i].bo, fence, true); -} - -/** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer @@ -813,8 +788,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_fence_pts(vm, mapping->it.start, - mapping->it.last + 1, f); + amdgpu_bo_fence(vm->page_directory, f, true); if (fence) { fence_put(*fence); *fence = fence_get(f); @@ -1089,6 +1063,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* walk over the address space and allocate the page tables */ for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { + struct reservation_object *resv = vm->page_directory->tbo.resv; struct amdgpu_bo *pt; if (vm->page_tables[pt_idx].bo) @@ -1097,11 +1072,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* drop mutex to allocate and clear page table */ mutex_unlock(&vm->mutex); + ww_mutex_lock(&resv->lock, NULL); r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS, - NULL, NULL, &pt); + NULL, resv, &pt); + ww_mutex_unlock(&resv->lock); if (r) goto error_free; -- cgit v0.10.2 From 4c7eb91cae88fd2aa101750d6825b4176f85ffb2 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Wed, 9 Sep 2015 09:05:55 +0800 Subject: drm/amdgpu: refine the job naming for amdgpu_job and amdgpu_sched_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use consistent naming across functions. Reviewed-by: Christian König Reviewed-by: David Zhou Signed-off-by: Junwei Zhang diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e587e20..58fe9fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1275,7 +1275,7 @@ struct amdgpu_job { uint32_t num_ibs; struct mutex job_lock; struct amdgpu_user_fence uf; - int (*free_job)(struct amdgpu_job *sched_job); + int (*free_job)(struct amdgpu_job *job); }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4b92e38..4c16988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -778,15 +778,15 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_free_job(struct amdgpu_job *sched_job) +static int amdgpu_cs_free_job(struct amdgpu_job *job) { int i; - if (sched_job->ibs) - for (i = 0; i < sched_job->num_ibs; i++) - amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); - kfree(sched_job->ibs); - if (sched_job->uf.bo) - drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base); + if (job->ibs) + for (i = 0; i < job->num_ibs; i++) + amdgpu_ib_free(job->adev, &job->ibs[i]); + kfree(job->ibs); + if (job->uf.bo) + drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 5724a81..af1a3da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -27,42 +27,42 @@ #include #include "amdgpu.h" -static struct fence *amdgpu_sched_dependency(struct amd_sched_job *job) +static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) { - struct amdgpu_job *sched_job = (struct amdgpu_job *)job; - return amdgpu_sync_get_fence(&sched_job->ibs->sync); + struct amdgpu_job *job = (struct amdgpu_job *)sched_job; + return amdgpu_sync_get_fence(&job->ibs->sync); } -static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) +static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) { struct amdgpu_fence *fence = NULL; - struct amdgpu_job *sched_job; + struct amdgpu_job *job; int r; - if (!job) { + if (!sched_job) { DRM_ERROR("job is null\n"); return NULL; } - sched_job = (struct amdgpu_job *)job; - mutex_lock(&sched_job->job_lock); - r = amdgpu_ib_schedule(sched_job->adev, - sched_job->num_ibs, - sched_job->ibs, - sched_job->base.owner); + job = (struct amdgpu_job *)sched_job; + mutex_lock(&job->job_lock); + r = amdgpu_ib_schedule(job->adev, + job->num_ibs, + job->ibs, + job->base.owner); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; } - fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); + fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence); err: - if (sched_job->free_job) - sched_job->free_job(sched_job); + if (job->free_job) + job->free_job(job); - mutex_unlock(&sched_job->job_lock); - fence_put(&sched_job->base.s_fence->base); - kfree(sched_job); + mutex_unlock(&job->job_lock); + fence_put(&job->base.s_fence->base); + kfree(job); return fence ? &fence->base : NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3cc8663..1a8e43b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -805,10 +805,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) } static int amdgpu_uvd_free_job( - struct amdgpu_job *sched_job) + struct amdgpu_job *job) { - amdgpu_ib_free(sched_job->adev, sched_job->ibs); - kfree(sched_job->ibs); + amdgpu_ib_free(job->adev, job->ibs); + kfree(job->ibs); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c90caf8..74f2038a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -342,10 +342,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } static int amdgpu_vce_free_job( - struct amdgpu_job *sched_job) + struct amdgpu_job *job) { - amdgpu_ib_free(sched_job->adev, sched_job->ibs); - kfree(sched_job->ibs); + amdgpu_ib_free(job->adev, job->ibs); + kfree(job->ibs); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d30fbec..1e14531 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -316,12 +316,12 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, } } -int amdgpu_vm_free_job(struct amdgpu_job *sched_job) +int amdgpu_vm_free_job(struct amdgpu_job *job) { int i; - for (i = 0; i < sched_job->num_ibs; i++) - amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); - kfree(sched_job->ibs); + for (i = 0; i < job->num_ibs; i++) + amdgpu_ib_free(job->adev, &job->ibs[i]); + kfree(job->ibs); return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 191fd51..5c8dcf8 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -68,29 +68,29 @@ static struct amd_sched_job * amd_sched_rq_select_job(struct amd_sched_rq *rq) { struct amd_sched_entity *entity; - struct amd_sched_job *job; + struct amd_sched_job *sched_job; spin_lock(&rq->lock); entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { - job = amd_sched_entity_pop_job(entity); - if (job) { + sched_job = amd_sched_entity_pop_job(entity); + if (sched_job) { rq->current_entity = entity; spin_unlock(&rq->lock); - return job; + return sched_job; } } } list_for_each_entry(entity, &rq->entities, list) { - job = amd_sched_entity_pop_job(entity); - if (job) { + sched_job = amd_sched_entity_pop_job(entity); + if (sched_job) { rq->current_entity = entity; spin_unlock(&rq->lock); - return job; + return sched_job; } if (entity == rq->current_entity) @@ -208,15 +208,15 @@ static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->scheduler; - struct amd_sched_job *job; + struct amd_sched_job *sched_job; if (ACCESS_ONCE(entity->dependency)) return NULL; - if (!kfifo_out_peek(&entity->job_queue, &job, sizeof(job))) + if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job))) return NULL; - while ((entity->dependency = sched->ops->dependency(job))) { + while ((entity->dependency = sched->ops->dependency(sched_job))) { if (fence_add_callback(entity->dependency, &entity->cb, amd_sched_entity_wakeup)) @@ -225,32 +225,33 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity) return NULL; } - return job; + return sched_job; } /** * Helper to submit a job to the job queue * - * @job The pointer to job required to submit + * @sched_job The pointer to job required to submit * * Returns true if we could submit the job. */ -static bool amd_sched_entity_in(struct amd_sched_job *job) +static bool amd_sched_entity_in(struct amd_sched_job *sched_job) { - struct amd_sched_entity *entity = job->s_entity; + struct amd_sched_entity *entity = sched_job->s_entity; bool added, first = false; spin_lock(&entity->queue_lock); - added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job); + added = kfifo_in(&entity->job_queue, &sched_job, + sizeof(sched_job)) == sizeof(sched_job); - if (added && kfifo_len(&entity->job_queue) == sizeof(job)) + if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job)) first = true; spin_unlock(&entity->queue_lock); /* first job wakes up scheduler */ if (first) - amd_sched_wakeup(job->sched); + amd_sched_wakeup(sched_job->sched); return added; } @@ -258,7 +259,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *job) /** * Submit a job to the job queue * - * @job The pointer to job required to submit + * @sched_job The pointer to job required to submit * * Returns 0 for success, negative error code otherwise. */ @@ -304,17 +305,17 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) static struct amd_sched_job * amd_sched_select_job(struct amd_gpu_scheduler *sched) { - struct amd_sched_job *job; + struct amd_sched_job *sched_job; if (!amd_sched_ready(sched)) return NULL; /* Kernel run queue has higher priority than normal run queue*/ - job = amd_sched_rq_select_job(&sched->kernel_rq); - if (job == NULL) - job = amd_sched_rq_select_job(&sched->sched_rq); + sched_job = amd_sched_rq_select_job(&sched->kernel_rq); + if (sched_job == NULL) + sched_job = amd_sched_rq_select_job(&sched->sched_rq); - return job; + return sched_job; } static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) @@ -340,20 +341,20 @@ static int amd_sched_main(void *param) while (!kthread_should_stop()) { struct amd_sched_entity *entity; struct amd_sched_fence *s_fence; - struct amd_sched_job *job; + struct amd_sched_job *sched_job; struct fence *fence; wait_event_interruptible(sched->wake_up_worker, kthread_should_stop() || - (job = amd_sched_select_job(sched))); + (sched_job = amd_sched_select_job(sched))); - if (!job) + if (!sched_job) continue; - entity = job->s_entity; - s_fence = job->s_fence; + entity = sched_job->s_entity; + s_fence = sched_job->s_fence; atomic_inc(&sched->hw_rq_count); - fence = sched->ops->run_job(job); + fence = sched->ops->run_job(sched_job); if (fence) { r = fence_add_callback(fence, &s_fence->cb, amd_sched_process_job); @@ -367,8 +368,9 @@ static int amd_sched_main(void *param) amd_sched_process_job(NULL, &s_fence->cb); } - count = kfifo_out(&entity->job_queue, &job, sizeof(job)); - WARN_ON(count != sizeof(job)); + count = kfifo_out(&entity->job_queue, &sched_job, + sizeof(sched_job)); + WARN_ON(count != sizeof(sched_job)); wake_up(&sched->job_scheduled); } return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index ac56d92..f33df6c 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -91,8 +91,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) * these functions should be implemented in driver side */ struct amd_sched_backend_ops { - struct fence *(*dependency)(struct amd_sched_job *job); - struct fence *(*run_job)(struct amd_sched_job *job); + struct fence *(*dependency)(struct amd_sched_job *sched_job); + struct fence *(*run_job)(struct amd_sched_job *sched_job); }; /** -- cgit v0.10.2 From a6db8a33e164ae72fb5429ab637e8cfee057a722 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Wed, 9 Sep 2015 09:21:19 +0800 Subject: drm/amdgpu: refine the scheduler job type conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use container_of rather than casting. Reviewed-by: Christian König Reviewed-by: David Zhou Signed-off-by: Junwei Zhang diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 58fe9fa..dbe061b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1277,6 +1277,8 @@ struct amdgpu_job { struct amdgpu_user_fence uf; int (*free_job)(struct amdgpu_job *job); }; +#define to_amdgpu_job(sched_job) \ + container_of((sched_job), struct amdgpu_job, base) static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4c16988..546968a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -863,7 +863,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job->free_job = amdgpu_cs_free_job; mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job((struct amd_sched_job *)job); + r = amd_sched_entity_push_job(&job->base); if (r) { mutex_unlock(&job->job_lock); amdgpu_cs_free_job(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index af1a3da..58408da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,7 +29,7 @@ static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) { - struct amdgpu_job *job = (struct amdgpu_job *)sched_job; + struct amdgpu_job *job = to_amdgpu_job(sched_job); return amdgpu_sync_get_fence(&job->ibs->sync); } @@ -43,7 +43,7 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) DRM_ERROR("job is null\n"); return NULL; } - job = (struct amdgpu_job *)sched_job; + job = to_amdgpu_job(sched_job); mutex_lock(&job->job_lock); r = amdgpu_ib_schedule(job->adev, job->num_ibs, @@ -94,7 +94,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, mutex_init(&job->job_lock); job->free_job = free_job; mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job((struct amd_sched_job *)job); + r = amd_sched_entity_push_job(&job->base); if (r) { mutex_unlock(&job->job_lock); kfree(job); -- cgit v0.10.2 From 0f75aee75112934bcaf42410df5c51d7194b5896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 7 Sep 2015 18:07:14 +0200 Subject: drm/amdgpu: cleanup entity init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorder the fields and properly return the kfifo_alloc error code. Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Chunming Zhou diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 5c8dcf8..a9d5826 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -118,23 +118,27 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_rq *rq, uint32_t jobs) { + int r; + if (!(sched && entity && rq)) return -EINVAL; memset(entity, 0, sizeof(struct amd_sched_entity)); - entity->belongto_rq = rq; - entity->scheduler = sched; - entity->fence_context = fence_context_alloc(1); - if(kfifo_alloc(&entity->job_queue, - jobs * sizeof(void *), - GFP_KERNEL)) - return -EINVAL; + INIT_LIST_HEAD(&entity->list); + entity->rq = rq; + entity->sched = sched; spin_lock_init(&entity->queue_lock); + r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL); + if (r) + return r; + atomic_set(&entity->fence_seq, 0); + entity->fence_context = fence_context_alloc(1); /* Add the entity to the run queue */ amd_sched_rq_add_entity(rq, entity); + return 0; } @@ -149,8 +153,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - return entity->scheduler == sched && - entity->belongto_rq != NULL; + return entity->sched == sched && + entity->rq != NULL; } /** @@ -180,7 +184,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->belongto_rq; + struct amd_sched_rq *rq = entity->rq; if (!amd_sched_entity_is_initialized(sched, entity)) return; @@ -201,13 +205,13 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) container_of(cb, struct amd_sched_entity, cb); entity->dependency = NULL; fence_put(f); - amd_sched_wakeup(entity->scheduler); + amd_sched_wakeup(entity->sched); } static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity) { - struct amd_gpu_scheduler *sched = entity->scheduler; + struct amd_gpu_scheduler *sched = entity->sched; struct amd_sched_job *sched_job; if (ACCESS_ONCE(entity->dependency)) @@ -275,7 +279,7 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) fence_get(&fence->base); sched_job->s_fence = fence; - wait_event(entity->scheduler->job_scheduled, + wait_event(entity->sched->job_scheduled, amd_sched_entity_in(sched_job)); trace_amd_sched_job(sched_job); return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f33df6c..c4fe24e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -38,13 +38,15 @@ struct amd_sched_rq; */ struct amd_sched_entity { struct list_head list; - struct amd_sched_rq *belongto_rq; - atomic_t fence_seq; - /* the job_queue maintains the jobs submitted by clients */ - struct kfifo job_queue; + struct amd_sched_rq *rq; + struct amd_gpu_scheduler *sched; + spinlock_t queue_lock; - struct amd_gpu_scheduler *scheduler; + struct kfifo job_queue; + + atomic_t fence_seq; uint64_t fence_context; + struct fence *dependency; struct fence_cb cb; }; diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index e62c379..733522f 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -36,7 +36,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity if (fence == NULL) return NULL; fence->owner = owner; - fence->scheduler = s_entity->scheduler; + fence->scheduler = s_entity->sched; spin_lock_init(&fence->lock); seq = atomic_inc_return(&s_entity->fence_seq); -- cgit v0.10.2 From 9b398fa5c24eb05fc60fafd8543cc03e9170f054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 7 Sep 2015 18:16:49 +0200 Subject: drm/amdgpu: rename fence->scheduler to sched v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just to be consistent with the other members. v2: rename the ring member as well. Signed-off-by: Christian König Reviewed-by: Junwei Zhang (v1) Reviewed-by: Chunming Zhou diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dbe061b..9108b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -891,7 +891,7 @@ struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; - struct amd_gpu_scheduler *scheduler; + struct amd_gpu_scheduler *sched; spinlock_t fence_lock; struct mutex *ring_lock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 546968a..6f39b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -848,7 +848,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) return -ENOMEM; - job->base.sched = ring->scheduler; + job->base.sched = ring->sched; job->base.s_entity = &parser->ctx->rings[ring->idx].entity; job->adev = parser->adev; job->ibs = parser->ibs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 20cbc4e..5494831 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -43,10 +43,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, for (i = 0; i < adev->num_rings; i++) { struct amd_sched_rq *rq; if (kernel) - rq = &adev->rings[i]->scheduler->kernel_rq; + rq = &adev->rings[i]->sched->kernel_rq; else - rq = &adev->rings[i]->scheduler->sched_rq; - r = amd_sched_entity_init(adev->rings[i]->scheduler, + rq = &adev->rings[i]->sched->sched_rq; + r = amd_sched_entity_init(adev->rings[i]->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs); if (r) @@ -55,7 +55,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, if (i < adev->num_rings) { for (j = 0; j < i; j++) - amd_sched_entity_fini(adev->rings[j]->scheduler, + amd_sched_entity_fini(adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx); return r; @@ -75,7 +75,7 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) if (amdgpu_enable_scheduler) { for (i = 0; i < adev->num_rings; i++) - amd_sched_entity_fini(adev->rings[i]->scheduler, + amd_sched_entity_fini(adev->rings[i]->sched, &ctx->rings[i].entity); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1be2bd6..8e8cd09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -626,11 +626,11 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ring->fence_drv.ring = ring; if (amdgpu_enable_scheduler) { - ring->scheduler = amd_sched_create(&amdgpu_sched_ops, - ring->idx, - amdgpu_sched_hw_submission, - (void *)ring->adev); - if (!ring->scheduler) + ring->sched = amd_sched_create(&amdgpu_sched_ops, + ring->idx, + amdgpu_sched_hw_submission, + (void *)ring->adev); + if (!ring->sched) DRM_ERROR("Failed to create scheduler on ring %d.\n", ring->idx); } @@ -681,8 +681,8 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) wake_up_all(&ring->fence_drv.fence_queue); amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - if (ring->scheduler) - amd_sched_destroy(ring->scheduler); + if (ring->sched) + amd_sched_destroy(ring->sched); ring->fence_drv.initialized = false; } mutex_unlock(&adev->ring_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index b70ce10..7cf5405 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -146,7 +146,7 @@ static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) s_fence = to_amd_sched_fence(f); if (s_fence) - return s_fence->scheduler->ring_id; + return s_fence->sched->ring_id; a_fence = to_amdgpu_fence(f); if (a_fence) return a_fence->ring->idx; @@ -437,7 +437,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, if (s_fence) seq_printf(m, " protected by 0x%016x on ring %d", s_fence->base.seqno, - s_fence->scheduler->ring_id); + s_fence->sched->ring_id); } seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 58408da..d1984fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -85,7 +85,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) return -ENOMEM; - job->base.sched = ring->scheduler; + job->base.sched = ring->sched; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; job->adev = adev; job->ibs = ibs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index cfd2999..b57ca10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -66,7 +66,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) if (a_fence) return a_fence->ring->adev == adev; if (s_fence) - return (struct amdgpu_device *)s_fence->scheduler->priv == adev; + return (struct amdgpu_device *)s_fence->sched->priv == adev; return false; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index a9d5826..ec4842e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -326,7 +326,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) { struct amd_sched_fence *s_fence = container_of(cb, struct amd_sched_fence, cb); - struct amd_gpu_scheduler *sched = s_fence->scheduler; + struct amd_gpu_scheduler *sched = s_fence->sched; atomic_dec(&sched->hw_rq_count); amd_sched_fence_signal(s_fence); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index c4fe24e..89d977d 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -65,7 +65,7 @@ struct amd_sched_rq { struct amd_sched_fence { struct fence base; struct fence_cb cb; - struct amd_gpu_scheduler *scheduler; + struct amd_gpu_scheduler *sched; spinlock_t lock; void *owner; }; diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 733522f..d802638 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -36,7 +36,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity if (fence == NULL) return NULL; fence->owner = owner; - fence->scheduler = s_entity->sched; + fence->sched = s_entity->sched; spin_lock_init(&fence->lock); seq = atomic_inc_return(&s_entity->fence_seq); @@ -63,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence) static const char *amd_sched_fence_get_timeline_name(struct fence *f) { struct amd_sched_fence *fence = to_amd_sched_fence(f); - return (const char *)fence->scheduler->name; + return (const char *)fence->sched->name; } static bool amd_sched_fence_enable_signaling(struct fence *f) -- cgit v0.10.2 From 5ec92a7692872d656cffe010920fb49c4f51d75f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 7 Sep 2015 18:43:02 +0200 Subject: drm/amdgpu: cleanup fence queue init v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the fence related stuff into amdgpu_fence.c v2: rework commit message, cause this is actually not a bug Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8e8cd09..7f2d85e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -625,6 +625,8 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) amdgpu_fence_check_lockup); ring->fence_drv.ring = ring; + init_waitqueue_head(&ring->fence_drv.fence_queue); + if (amdgpu_enable_scheduler) { ring->sched = amd_sched_create(&amdgpu_sched_ops, ring->idx, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 130292d..6e73543 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -360,8 +360,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, amdgpu_fence_driver_init_ring(ring); } - init_waitqueue_head(&ring->fence_drv.fence_queue); - r = amdgpu_wb_get(adev, &ring->rptr_offs); if (r) { dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); -- cgit v0.10.2 From 4f839a243d3b0d8b1a14f4778a87ec4d8ddbf15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 8 Sep 2015 20:22:31 +0200 Subject: drm/amdgpu: more scheduler cleanups v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Embed the scheduler into the ring structure instead of allocating it. Use the ring name directly instead of the id. v2: rebased, whitespace cleanup Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Chunming Zhou diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9108b7c..57b427f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -433,7 +433,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); -void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, unsigned irq_type); @@ -891,7 +891,7 @@ struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; - struct amd_gpu_scheduler *sched; + struct amd_gpu_scheduler sched; spinlock_t fence_lock; struct mutex *ring_lock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6f39b2d..b74b6a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -848,7 +848,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) return -ENOMEM; - job->base.sched = ring->sched; + job->base.sched = &ring->sched; job->base.s_entity = &parser->ctx->rings[ring->idx].entity; job->adev = parser->adev; job->ibs = parser->ibs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5494831..e0b80cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -43,10 +43,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, for (i = 0; i < adev->num_rings; i++) { struct amd_sched_rq *rq; if (kernel) - rq = &adev->rings[i]->sched->kernel_rq; + rq = &adev->rings[i]->sched.kernel_rq; else - rq = &adev->rings[i]->sched->sched_rq; - r = amd_sched_entity_init(adev->rings[i]->sched, + rq = &adev->rings[i]->sched.sched_rq; + r = amd_sched_entity_init(&adev->rings[i]->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs); if (r) @@ -55,7 +55,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, if (i < adev->num_rings) { for (j = 0; j < i; j++) - amd_sched_entity_fini(adev->rings[j]->sched, + amd_sched_entity_fini(&adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx); return r; @@ -75,7 +75,7 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) if (amdgpu_enable_scheduler) { for (i = 0; i < adev->num_rings; i++) - amd_sched_entity_fini(adev->rings[i]->sched, + amd_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7f2d85e..b3fc26c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -609,9 +609,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). */ -void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) { - int i; + int i, r; ring->fence_drv.cpu_addr = NULL; ring->fence_drv.gpu_addr = 0; @@ -628,14 +628,16 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) init_waitqueue_head(&ring->fence_drv.fence_queue); if (amdgpu_enable_scheduler) { - ring->sched = amd_sched_create(&amdgpu_sched_ops, - ring->idx, - amdgpu_sched_hw_submission, - (void *)ring->adev); - if (!ring->sched) - DRM_ERROR("Failed to create scheduler on ring %d.\n", - ring->idx); + r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, + amdgpu_sched_hw_submission, ring->name); + if (r) { + DRM_ERROR("Failed to create scheduler on ring %s.\n", + ring->name); + return r; + } } + + return 0; } /** @@ -683,8 +685,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) wake_up_all(&ring->fence_drv.fence_queue); amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - if (ring->sched) - amd_sched_destroy(ring->sched); + amd_sched_fini(&ring->sched); ring->fence_drv.initialized = false; } mutex_unlock(&adev->ring_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6e73543..30dce23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -357,7 +357,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->adev = adev; ring->idx = adev->num_rings++; adev->rings[ring->idx] = ring; - amdgpu_fence_driver_init_ring(ring); + r = amdgpu_fence_driver_init_ring(ring); + if (r) + return r; } r = amdgpu_wb_get(adev, &ring->rptr_offs); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 7cf5405..e907124 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -145,8 +145,13 @@ static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) struct amd_sched_fence *s_fence; s_fence = to_amd_sched_fence(f); - if (s_fence) - return s_fence->sched->ring_id; + if (s_fence) { + struct amdgpu_ring *ring; + + ring = container_of(s_fence->sched, struct amdgpu_ring, sched); + return ring->idx; + } + a_fence = to_amdgpu_fence(f); if (a_fence) return a_fence->ring->idx; @@ -412,6 +417,26 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, } #if defined(CONFIG_DEBUG_FS) + +static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m) +{ + struct amdgpu_fence *a_fence = to_amdgpu_fence(fence); + struct amd_sched_fence *s_fence = to_amd_sched_fence(fence); + + if (a_fence) + seq_printf(m, " protected by 0x%016llx on ring %d", + a_fence->seq, a_fence->ring->idx); + + if (s_fence) { + struct amdgpu_ring *ring; + + + ring = container_of(s_fence->sched, struct amdgpu_ring, sched); + seq_printf(m, " protected by 0x%016x on ring %d", + s_fence->base.seqno, ring->idx); + } +} + void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m) { @@ -428,18 +453,8 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, } seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", soffset, eoffset, eoffset - soffset); - if (i->fence) { - struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); - struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence); - if (a_fence) - seq_printf(m, " protected by 0x%016llx on ring %d", - a_fence->seq, a_fence->ring->idx); - if (s_fence) - seq_printf(m, " protected by 0x%016x on ring %d", - s_fence->base.seqno, - s_fence->sched->ring_id); - - } + if (i->fence) + amdgpu_sa_bo_dump_fence(i->fence, m); seq_printf(m, "\n"); } spin_unlock(&sa_manager->wq.lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d1984fc..2e946b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -85,7 +85,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) return -ENOMEM; - job->base.sched = ring->sched; + job->base.sched = &ring->sched; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; job->adev = adev; job->ibs = ibs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b57ca10..4921de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -65,8 +65,14 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) if (a_fence) return a_fence->ring->adev == adev; - if (s_fence) - return (struct amdgpu_device *)s_fence->sched->priv == adev; + + if (s_fence) { + struct amdgpu_ring *ring; + + ring = container_of(s_fence->sched, struct amdgpu_ring, sched); + return ring->adev == adev; + } + return false; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index a1f4ece..144f50a 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -16,21 +16,21 @@ TRACE_EVENT(amd_sched_job, TP_ARGS(sched_job), TP_STRUCT__entry( __field(struct amd_sched_entity *, entity) - __field(u32, ring_id) + __field(const char *, name) __field(u32, job_count) __field(int, hw_job_count) ), TP_fast_assign( __entry->entity = sched_job->s_entity; - __entry->ring_id = sched_job->sched->ring_id; + __entry->name = sched_job->sched->name; __entry->job_count = kfifo_len( &sched_job->s_entity->job_queue) / sizeof(sched_job); __entry->hw_job_count = atomic_read( &sched_job->sched->hw_rq_count); ), - TP_printk("entity=%p, ring=%u, job count:%u, hw job count:%d", - __entry->entity, __entry->ring_id, __entry->job_count, + TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->name, __entry->job_count, __entry->hw_job_count) ); #endif diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ec4842e..3697eee 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -381,56 +381,45 @@ static int amd_sched_main(void *param) } /** - * Create a gpu scheduler + * Init a gpu scheduler instance * + * @sched The pointer to the scheduler * @ops The backend operations for this scheduler. - * @ring The the ring id for the scheduler. * @hw_submissions Number of hw submissions to do. + * @name Name used for debugging * - * Return the pointer to scheduler for success, otherwise return NULL + * Return 0 on success, otherwise error code. */ -struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops, - unsigned ring, unsigned hw_submission, - void *priv) +int amd_sched_init(struct amd_gpu_scheduler *sched, + struct amd_sched_backend_ops *ops, + unsigned hw_submission, const char *name) { - struct amd_gpu_scheduler *sched; - - sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); - if (!sched) - return NULL; - sched->ops = ops; - sched->ring_id = ring; sched->hw_submission_limit = hw_submission; - sched->priv = priv; - snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring); + sched->name = name; amd_sched_rq_init(&sched->sched_rq); amd_sched_rq_init(&sched->kernel_rq); init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->job_scheduled); atomic_set(&sched->hw_rq_count, 0); + /* Each scheduler will run on a seperate kernel thread */ sched->thread = kthread_run(amd_sched_main, sched, sched->name); if (IS_ERR(sched->thread)) { - DRM_ERROR("Failed to create scheduler for id %d.\n", ring); - kfree(sched); - return NULL; + DRM_ERROR("Failed to create scheduler for %s.\n", name); + return PTR_ERR(sched->thread); } - return sched; + return 0; } /** * Destroy a gpu scheduler * * @sched The pointer to the scheduler - * - * return 0 if succeed. -1 if failed. */ -int amd_sched_destroy(struct amd_gpu_scheduler *sched) +void amd_sched_fini(struct amd_gpu_scheduler *sched) { kthread_stop(sched->thread); - kfree(sched); - return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 89d977d..80b64dc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -101,23 +101,21 @@ struct amd_sched_backend_ops { * One scheduler is implemented for each hardware ring */ struct amd_gpu_scheduler { - struct task_struct *thread; + struct amd_sched_backend_ops *ops; + uint32_t hw_submission_limit; + const char *name; struct amd_sched_rq sched_rq; struct amd_sched_rq kernel_rq; - atomic_t hw_rq_count; - struct amd_sched_backend_ops *ops; - uint32_t ring_id; wait_queue_head_t wake_up_worker; wait_queue_head_t job_scheduled; - uint32_t hw_submission_limit; - char name[20]; - void *priv; + atomic_t hw_rq_count; + struct task_struct *thread; }; -struct amd_gpu_scheduler * -amd_sched_create(struct amd_sched_backend_ops *ops, - uint32_t ring, uint32_t hw_submission, void *priv); -int amd_sched_destroy(struct amd_gpu_scheduler *sched); +int amd_sched_init(struct amd_gpu_scheduler *sched, + struct amd_sched_backend_ops *ops, + uint32_t hw_submission, const char *name); +void amd_sched_fini(struct amd_gpu_scheduler *sched); int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, -- cgit v0.10.2 From 1ee4478a26cf55c8f8a6219d7e99f2b48959394d Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Thu, 10 Sep 2015 13:41:38 -0400 Subject: drm/amdgpu: Disable UVD PG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This causes problems with multiple suspend/resume cycles. Signed-off-by: Leo Liu Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 552d9e7..b55ceb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1400,7 +1400,8 @@ static int vi_common_early_init(void *handle) case CHIP_CARRIZO: adev->has_uvd = true; adev->cg_flags = 0; - adev->pg_flags = AMDGPU_PG_SUPPORT_UVD | AMDGPU_PG_SUPPORT_VCE; + /* Disable UVD pg */ + adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE; adev->external_rev_id = adev->rev_id + 0x1; if (amdgpu_smc_load_fw && smc_enabled) adev->firmware.smu_load = true; -- cgit v0.10.2 From 5146419e6feb99cfbc8dbf005dd2f62603e15efb Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 15 Sep 2015 10:38:38 -0400 Subject: drm/amdgpu: make UVD handle checking more strict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invalid messages can crash the hw otherwise Ported from radeon commit a1b403da70e038ca6c6c6fe434d1d873546873a3 Signed-off-by: Leo Liu Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 1a8e43b..d0312364 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -543,46 +543,60 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, return -EINVAL; } - if (msg_type == 1) { + switch (msg_type) { + case 0: + /* it's a create msg, calc image size (width * height) */ + amdgpu_bo_kunmap(bo); + + /* try to alloc a new handle */ + for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + if (atomic_read(&adev->uvd.handles[i]) == handle) { + DRM_ERROR("Handle 0x%x already in use!\n", handle); + return -EINVAL; + } + + if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { + adev->uvd.filp[i] = ctx->parser->filp; + return 0; + } + } + + DRM_ERROR("No more free UVD handles!\n"); + return -EINVAL; + + case 1: /* it's a decode msg, calc buffer sizes */ r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); amdgpu_bo_kunmap(bo); if (r) return r; - } else if (msg_type == 2) { + /* validate the handle */ + for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + if (atomic_read(&adev->uvd.handles[i]) == handle) { + if (adev->uvd.filp[i] != ctx->parser->filp) { + DRM_ERROR("UVD handle collision detected!\n"); + return -EINVAL; + } + return 0; + } + } + + DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); + return -ENOENT; + + case 2: /* it's a destroy msg, free the handle */ for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); amdgpu_bo_kunmap(bo); return 0; - } else { - /* it's a create msg */ - amdgpu_bo_kunmap(bo); - - if (msg_type != 0) { - DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); - return -EINVAL; - } - - /* it's a create msg, no special handling needed */ - } - - /* create or decode, validate the handle */ - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { - if (atomic_read(&adev->uvd.handles[i]) == handle) - return 0; - } - /* handle not found try to alloc a new one */ - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { - if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { - adev->uvd.filp[i] = ctx->parser->filp; - return 0; - } + default: + DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); + return -EINVAL; } - - DRM_ERROR("No more free UVD handles!\n"); + BUG(); return -EINVAL; } -- cgit v0.10.2 From 2bd188d0167227932be3cf5b033c0e600b01291f Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Fri, 11 Sep 2015 14:22:18 -0400 Subject: drm/amdgpu: fix the UVD suspend sequence order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes suspend issues with UVD. Signed-off-by: Leo Liu Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 5fac5da..ed50dd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -224,11 +224,11 @@ static int uvd_v4_2_suspend(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = uvd_v4_2_hw_fini(adev); + r = amdgpu_uvd_suspend(adev); if (r) return r; - r = amdgpu_uvd_suspend(adev); + r = uvd_v4_2_hw_fini(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 2d5c59c..9ad8b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -220,11 +220,11 @@ static int uvd_v5_0_suspend(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = uvd_v5_0_hw_fini(adev); + r = amdgpu_uvd_suspend(adev); if (r) return r; - r = amdgpu_uvd_suspend(adev); + r = uvd_v5_0_hw_fini(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index d9f553f..a7622ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -214,11 +214,11 @@ static int uvd_v6_0_suspend(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = uvd_v6_0_hw_fini(adev); + r = amdgpu_uvd_suspend(adev); if (r) return r; - r = amdgpu_uvd_suspend(adev); + r = uvd_v6_0_hw_fini(adev); if (r) return r; -- cgit v0.10.2 From 1f4452105ce39786be13b4636251377a30d1286a Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Fri, 11 Sep 2015 17:09:57 -0400 Subject: drm/amdgpu: fix UVD suspend and resume for VI APU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User space passed the same handle before suspend and after resume, so we have remove the session and handle destroy, and keep the firmware untouched. Signed-off-by: Leo Liu Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index a7622ef..7e9934f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -214,10 +214,12 @@ static int uvd_v6_0_suspend(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_suspend(adev); - if (r) - return r; - + /* Skip this for APU for now */ + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_uvd_suspend(adev); + if (r) + return r; + } r = uvd_v6_0_hw_fini(adev); if (r) return r; @@ -230,10 +232,12 @@ static int uvd_v6_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); - if (r) - return r; - + /* Skip this for APU for now */ + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_uvd_resume(adev); + if (r) + return r; + } r = uvd_v6_0_hw_init(adev); if (r) return r; -- cgit v0.10.2 From 71affda522bb0f43e205cf4f000e2c50261c01a6 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 21 Sep 2015 17:34:39 -0400 Subject: drm/amdgpu: use kmemdup rather than duplicating its implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch was generated using fixed coccinelle semantic patch scripts/coccinelle/api/memdup.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2014320 Signed-off-by: Andrzej Hajda Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index f25cfed..1a7708f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -536,12 +536,10 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, if (metadata == NULL) return -EINVAL; - buffer = kzalloc(metadata_size, GFP_KERNEL); + buffer = kmemdup(metadata, metadata_size, GFP_KERNEL); if (buffer == NULL) return -ENOMEM; - memcpy(buffer, metadata, metadata_size); - kfree(bo->metadata); bo->metadata_flags = flags; bo->metadata = buffer; -- cgit v0.10.2 From 5a6adfa20b622a273205e33b20c12332aa7eb724 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 22 Sep 2015 10:06:45 -0400 Subject: drm/amdgpu: Fix max_vblank_count value for current display engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value was much too low, which could cause the userspace visible vblank counter to move backwards when the hardware counter wrapped around. Ported from radeon commit: b0b9bb4dd51f396dcf843831905f729e74b0c8c0 Reviewed-by: Christian König Reviewed-by: Jammy Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 0aba8e9..7c42ff6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -140,7 +140,7 @@ void amdgpu_irq_preinstall(struct drm_device *dev) */ int amdgpu_irq_postinstall(struct drm_device *dev) { - dev->max_vblank_count = 0x001fffff; + dev->max_vblank_count = 0x00ffffff; return 0; } -- cgit v0.10.2 From 1d263474c4416efb6d0feca98fe6d462b0d28f56 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Sep 2015 13:59:28 +0300 Subject: drm/amdgpu: unwind properly in amdgpu_cs_parser_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amdgpu_cs_parser_init() function doesn't clean up after itself but instead the caller uses a free everything function amdgpu_cs_parser_fini() on failure. This style of error handling is often buggy. In this example, we call "drm_free_large(parser->chunks[i].kdata);" when it is an unintialized pointer or when "parser->chunks" is NULL. I fixed this bug by adding unwind code so that it frees everything that it allocates. I also mode some other very minor changes: 1) Renamed "r" to "ret". 2) Moved the chunk_array allocation to the start of the function. 3) Removed some initializers which are no longer needed. Reviewed-by: Christian König Reported-by: Ilja Van Sprundel Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b74b6a8..749420f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -154,42 +154,41 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; - uint64_t *chunk_array = NULL; + uint64_t *chunk_array; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; unsigned size, i; - int r = 0; + int ret; - if (!cs->in.num_chunks) - goto out; + if (cs->in.num_chunks == 0) + return 0; + + chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + if (!chunk_array) + return -ENOMEM; p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); if (!p->ctx) { - r = -EINVAL; - goto out; + ret = -EINVAL; + goto free_chunk; } + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); /* get chunks */ INIT_LIST_HEAD(&p->validated); - chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); - if (chunk_array == NULL) { - r = -ENOMEM; - goto out; - } - chunk_array_user = (uint64_t __user *)(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { - r = -EFAULT; - goto out; + ret = -EFAULT; + goto put_bo_list; } p->nchunks = cs->in.num_chunks; p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), GFP_KERNEL); - if (p->chunks == NULL) { - r = -ENOMEM; - goto out; + if (!p->chunks) { + ret = -ENOMEM; + goto put_bo_list; } for (i = 0; i < p->nchunks; i++) { @@ -200,8 +199,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) chunk_ptr = (void __user *)chunk_array[i]; if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { - r = -EFAULT; - goto out; + ret = -EFAULT; + i--; + goto free_partial_kdata; } p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].length_dw = user_chunk.length_dw; @@ -212,13 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); if (p->chunks[i].kdata == NULL) { - r = -ENOMEM; - goto out; + ret = -ENOMEM; + i--; + goto free_partial_kdata; } size *= sizeof(uint32_t); if (copy_from_user(p->chunks[i].kdata, cdata, size)) { - r = -EFAULT; - goto out; + ret = -EFAULT; + goto free_partial_kdata; } switch (p->chunks[i].chunk_id) { @@ -238,15 +239,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, handle); if (gobj == NULL) { - r = -EINVAL; - goto out; + ret = -EINVAL; + goto free_partial_kdata; } p->uf.bo = gem_to_amdgpu_bo(gobj); p->uf.offset = fence_data->offset; } else { - r = -EINVAL; - goto out; + ret = -EINVAL; + goto free_partial_kdata; } break; @@ -254,19 +255,35 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; default: - r = -EINVAL; - goto out; + ret = -EINVAL; + goto free_partial_kdata; } } p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!p->ibs) - r = -ENOMEM; + if (!p->ibs) { + ret = -ENOMEM; + goto free_all_kdata; + } -out: kfree(chunk_array); - return r; + return 0; + +free_all_kdata: + i = p->nchunks - 1; +free_partial_kdata: + for (; i >= 0; i--) + drm_free_large(p->chunks[i].kdata); + kfree(p->chunks); +put_bo_list: + if (p->bo_list) + amdgpu_bo_list_put(p->bo_list); + amdgpu_ctx_put(p->ctx); +free_chunk: + kfree(chunk_array); + + return ret; } /* Returns how many bytes TTM can move per IB. @@ -810,7 +827,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(parser, data); if (r) { DRM_ERROR("Failed to initialize parser !\n"); - amdgpu_cs_parser_fini(parser, r, false); + kfree(parser); up_read(&adev->exclusive_lock); r = amdgpu_cs_handle_lockup(adev, r); return r; -- cgit v0.10.2 From 0d2edd3791bb172a59d708d5c94330bbd6050f97 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Sep 2015 14:00:12 +0300 Subject: drm/amdgpu: integer overflow in amdgpu_info_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "alloc_size" calculation can overflow leading to memory corruption. Reviewed-by: Christian König Reported-by: Ilja Van Sprundel Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2236793..8c735f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -390,7 +390,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - unsigned n, alloc_size = info->read_mmr_reg.count * 4; + unsigned n, alloc_size; uint32_t *regs; unsigned se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & @@ -406,9 +406,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; - regs = kmalloc(alloc_size, GFP_KERNEL); + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); if (!regs) return -ENOMEM; + alloc_size = info->read_mmr_reg.count * sizeof(*regs); for (i = 0; i < info->read_mmr_reg.count; i++) if (amdgpu_asic_read_register(adev, se_num, sh_num, -- cgit v0.10.2 From 0913eab648e4fb30ddca8882e707d0fcf5b237c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Sep 2015 14:00:35 +0300 Subject: drm/amdgpu: info leak in amdgpu_gem_metadata_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no limit on args->data.data_size_bytes so we could read beyond the end of the args->data.data[] array. Reviewed-by: Christian König Reported-by: Ilja Van Sprundel Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 2f39fea..b82fab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -427,6 +427,10 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, &args->data.data_size_bytes, &args->data.flags); } else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) { + if (args->data.data_size_bytes > sizeof(args->data.data)) { + r = -EINVAL; + goto unreserve; + } r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info); if (!r) r = amdgpu_bo_set_metadata(robj, args->data.data, @@ -434,6 +438,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, args->data.flags); } +unreserve: amdgpu_bo_unreserve(robj); out: drm_gem_object_unreference_unlocked(gobj); -- cgit v0.10.2 From 54ef0b5461c071050c61e501af5544842d61f40a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Sep 2015 14:00:59 +0300 Subject: drm/amdgpu: integer overflow in amdgpu_mode_dumb_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit args->size is a u64. arg->pitch and args->height are u32. The multiplication will overflow instead of using the high 32 bits as intended. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b82fab2..7297ca3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -659,7 +659,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, int r; args->pitch = amdgpu_align_pitch(adev, args->width, args->bpp, 0) * ((args->bpp + 1) / 8); - args->size = args->pitch * args->height; + args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); r = amdgpu_gem_object_create(adev, args->size, 0, -- cgit v0.10.2 From 5c3422b0b135b46c8dca9c1d909c1ae84f3561bd Mon Sep 17 00:00:00 2001 From: "monk.liu" Date: Wed, 23 Sep 2015 13:49:58 +0800 Subject: drm/amdgpu: sync ce and me with SWITCH_BUFFER(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we used to adopt wait_reg_mem to let CE wait before DE finish page updating, but from Tonga+, CE doesn't support wait_reg_mem package so this logic no longer works. so here is another approach to do same thing: Insert two of SWITCH_BUFFER at both front and end of vm_flush can guarantee that CE not go further to process IB_const before vm_flush done. Insert two of SWITCH_BUFFER also works on CI, so remove legency method to sync CE and ME v2: Insert double SWITCH_BUFFER at front of vm flush as well. Signed-off-by: monk.liu Reviewed-by: Christian König diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 57b427f..6647fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1202,8 +1202,6 @@ struct amdgpu_gfx { struct amdgpu_irq_src priv_inst_irq; /* gfx status */ uint32_t gfx_current_status; - /* sync signal for const engine */ - unsigned ce_sync_offs; /* ce ram size*/ unsigned ce_ram_size; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 392ec10..e992bf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; - - /* instruct DE to set a magic number */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(5))); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - - /* let CE wait till condition satisfied */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ - WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3) | /* == */ - WAIT_REG_MEM_ENGINE(2))); /* ce */ - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, 4); /* poll interval */ - - /* instruct CE to reset wb of ce_sync to zero */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | - WRITE_DATA_DST_SEL(5) | - WR_CONFIRM)); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 0); -} - /* * vm * VMID 0 is the physical GPU addresses as used by the kernel. @@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + if (usepfp) { + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0x0); /* synce CE with ME to prevent CE fetch CEIB before context switch done */ - gfx_v7_0_ce_sync_me(ring); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); } } @@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); - if (r) { - DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); - return r; - } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; @@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); - gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 78e5900..cb4f68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); - if (r) { - DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); - return r; - } - /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); - gfx_v8_0_mec_fini(adev); return 0; @@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, return true; } -static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; - - /* instruct DE to set a magic number */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(5))); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - - /* let CE wait till condition satisfied */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ - WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3) | /* == */ - WAIT_REG_MEM_ENGINE(2))); /* ce */ - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, 4); /* poll interval */ - - /* instruct CE to reset wb of ce_sync to zero */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | - WRITE_DATA_DST_SEL(5) | - WR_CONFIRM)); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 0); -} - static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { @@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, 4); /* poll interval */ + if (usepfp) { + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)) | @@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ - gfx_v8_0_ce_sync_me(ring); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); } } -- cgit v0.10.2 From 6adaed5bfe4f6f0a0e027e87d5dd80bd9834d5f0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 23 Sep 2015 20:26:45 +0200 Subject: drm/radeon: Sprinkle drm_modeset_lock_all to appease locking checks In commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c Author: Daniel Vetter Date: Thu Jul 9 23:44:28 2015 +0200 drm: Check locking in drm_for_each_connector I added locking checks to drm_for_each_connector but failed that through drm_helper_connector_dpms -> drm_helper_choose_encoder_dpms it's used in a few more places in the radeon resume/suspend code. Fix them up. Note that we could use the connector iterator macros in there too, but that's for the future. Reported-and-tested-by: Borislav Petkov Cc: Borislav Petkov Cc: Alex Deucher Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index d8319da..f3f562f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1573,10 +1573,12 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_kms_helper_poll_disable(dev); + drm_modeset_lock_all(dev); /* turn off display hw */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } + drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -1734,9 +1736,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) if (fbcon) { drm_helper_resume_force_mode(dev); /* turn on display hw */ + drm_modeset_lock_all(dev); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } + drm_modeset_unlock_all(dev); } drm_kms_helper_poll_enable(dev); -- cgit v0.10.2 From 4c7fbc39b1d58d9f4113ef962743a67bcdfe6be2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Sep 2015 14:32:06 -0400 Subject: drm/amdgpu: Sprinkle drm_modeset_lock_all to appease locking checks In commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c Author: Daniel Vetter Date: Thu Jul 9 23:44:28 2015 +0200 drm: Check locking in drm_for_each_connector I added locking checks to drm_for_each_connector but failed that through drm_helper_connector_dpms -> drm_helper_choose_encoder_dpms it's used in a few more places in the amdgpu resume/suspend code. Fix them up. Note that we could use the connector iterator macros in there too, but that's for the future. Port of radeon commit: drm/radeon: Sprinkle drm_modeset_lock_all to appease locking checks Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2d569ec..6068d82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1651,9 +1651,11 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_kms_helper_poll_disable(dev); /* turn off display hw */ + drm_modeset_lock_all(dev); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } + drm_modeset_unlock_all(dev); /* unpin the front buffers */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -1748,9 +1750,11 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) if (fbcon) { drm_helper_resume_force_mode(dev); /* turn on display hw */ + drm_modeset_lock_all(dev); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } + drm_modeset_unlock_all(dev); } drm_kms_helper_poll_enable(dev); -- cgit v0.10.2 From e78654799135a788a941bacad3452fbd7083e518 Mon Sep 17 00:00:00 2001 From: Maxim Sheviakov Date: Wed, 23 Sep 2015 17:10:51 -0400 Subject: drm/radeon: add quirk for MSI R7 370 Just adds the quirk for MSI R7 370 Armor 2X Bug: https://bugs.freedesktop.org/show_bug.cgi?id=91294 Signed-off-by: Maxim Sheviakov Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 787cd8f..e9115d3 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2927,6 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, + { PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 }, { 0, 0, 0, 0 }, }; -- cgit v0.10.2 From 675ee231d960af2af3606b4480324e26797eb010 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Sep 2015 14:00:21 -0700 Subject: tcp: add proper TS val into RST packets RST packets sent on behalf of TCP connections with TS option (RFC 7323 TCP timestamps) have incorrect TS val (set to 0), but correct TS ecr. A > B: Flags [S], seq 0, win 65535, options [mss 1000,nop,nop,TS val 100 ecr 0], length 0 B > A: Flags [S.], seq 2444755794, ack 1, win 28960, options [mss 1460,nop,nop,TS val 7264344 ecr 100], length 0 A > B: Flags [.], ack 1, win 65535, options [nop,nop,TS val 110 ecr 7264344], length 0 B > A: Flags [R.], seq 1, ack 1, win 28960, options [nop,nop,TS val 0 ecr 110], length 0 We need to call skb_mstamp_get() to get proper TS val, derived from skb->skb_mstamp Note that RFC 1323 was advocating to not send TS option in RST segment, but RFC 7323 recommends the opposite : Once TSopt has been successfully negotiated, that is both and contain TSopt, the TSopt MUST be sent in every non- segment for the duration of the connection, and SHOULD be sent in an segment (see Section 5.2 for details) Note this RFC recommends to send TS val = 0, but we believe it is premature : We do not know if all TCP stacks are properly handling the receive side : When an segment is received, it MUST NOT be subjected to the PAWS check by verifying an acceptable value in SEG.TSval, and information from the Timestamps option MUST NOT be used to update connection state information. SEG.TSecr MAY be used to provide stricter acceptance checks. In 5 years, if/when all TCP stack are RFC 7323 ready, we might consider to decide to send TS val = 0, if it buys something. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f9a8a12..1100ffe 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2897,6 +2897,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); + skb_mstamp_get(&skb->skb_mstamp); /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); -- cgit v0.10.2 From 2d8bff12699abc3a9bf886bb0b79f44d94d81496 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 23 Sep 2015 14:57:58 -0400 Subject: netpoll: Close race condition between poll_one_napi and napi_disable Drivers might call napi_disable while not holding the napi instance poll_lock. In those instances, its possible for a race condition to exist between poll_one_napi and napi_disable. That is to say, poll_one_napi only tests the NAPI_STATE_SCHED bit to see if there is work to do during a poll, and as such the following may happen: CPU0 CPU1 ndo_tx_timeout napi_poll_dev napi_disable poll_one_napi test_and_set_bit (ret 0) test_bit (ret 1) reset adapter napi_poll_routine If the adapter gets a tx timeout without a napi instance scheduled, its possible for the adapter to think it has exclusive access to the hardware (as the napi instance is now scheduled via the napi_disable call), while the netpoll code thinks there is simply work to do. The result is parallel hardware access leading to corrupt data structures in the driver, and a crash. Additionaly, there is another, more critical race between netpoll and napi_disable. The disabled napi state is actually identical to the scheduled state for a given napi instance. The implication being that, if a napi instance is disabled, a netconsole instance would see the napi state of the device as having been scheduled, and poll it, likely while the driver was dong something requiring exclusive access. In the case above, its fairly clear that not having the rings in a state ready to be polled will cause any number of crashes. The fix should be pretty easy. netpoll uses its own bit to indicate that that the napi instance is in a state of being serviced by netpoll (NAPI_STATE_NPSVC). We can just gate disabling on that bit as well as the sched bit. That should prevent netpoll from conducting a napi poll if we convert its set bit to a test_and_set_bit operation to provide mutual exclusion Change notes: V2) Remove a trailing whtiespace Resubmit with proper subject prefix V3) Clean up spacing nits Signed-off-by: Neil Horman CC: "David S. Miller" CC: jmaxwell@redhat.com Tested-by: jmaxwell@redhat.com Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88a0069..2d15e38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -507,6 +507,7 @@ static inline void napi_enable(struct napi_struct *n) BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); + clear_bit(NAPI_STATE_NPSVC, &n->state); } #ifdef CONFIG_SMP diff --git a/net/core/dev.c b/net/core/dev.c index 877c848..6bb6470 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4713,6 +4713,8 @@ void napi_disable(struct napi_struct *n) while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) msleep(1); + while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state)) + msleep(1); hrtimer_cancel(&n->timer); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6aa3db8..8bdada2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -142,7 +142,7 @@ static void queue_process(struct work_struct *work) */ static int poll_one_napi(struct napi_struct *napi, int budget) { - int work; + int work = 0; /* net_rx_action's ->poll() invocations and our's are * synchronized by this test which is only made while @@ -151,7 +151,12 @@ static int poll_one_napi(struct napi_struct *napi, int budget) if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; - set_bit(NAPI_STATE_NPSVC, &napi->state); + /* If we set this bit but see that it has already been set, + * that indicates that napi has been disabled and we need + * to abort this operation + */ + if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) + goto out; work = napi->poll(napi, budget); WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); @@ -159,6 +164,7 @@ static int poll_one_napi(struct napi_struct *napi, int budget) clear_bit(NAPI_STATE_NPSVC, &napi->state); +out: return budget - work; } -- cgit v0.10.2 From d3869efe7a8a2298516d9af4f91487cf486ca945 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 19:45:08 +0100 Subject: Fix AF_PACKET ABI breakage in 4.2 Commit 7d82410950aa ("virtio: add explicit big-endian support to memory accessors") accidentally changed the virtio_net header used by AF_PACKET with PACKET_VNET_HDR from host-endian to big-endian. Since virtio_legacy_is_little_endian() is a very long identifier, define a vio_le macro and use that throughout the code instead of the hard-coded 'false' for little-endian. This restores the ABI to match 4.1 and earlier kernels, and makes my test program work again. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7b8e39a..aa4b15c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -230,6 +230,8 @@ struct packet_skb_cb { } sa; }; +#define vio_le() virtio_legacy_is_little_endian() + #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) @@ -2680,15 +2682,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_unlock; if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - (__virtio16_to_cpu(false, vnet_hdr.csum_start) + - __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2 > - __virtio16_to_cpu(false, vnet_hdr.hdr_len))) - vnet_hdr.hdr_len = __cpu_to_virtio16(false, - __virtio16_to_cpu(false, vnet_hdr.csum_start) + - __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2); + (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 > + __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len))) + vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(), + __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2); err = -EINVAL; - if (__virtio16_to_cpu(false, vnet_hdr.hdr_len) > len) + if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len) goto out_unlock; if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { @@ -2731,7 +2733,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, - __virtio16_to_cpu(false, vnet_hdr.hdr_len), + __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len), msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; @@ -2778,8 +2780,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - u16 s = __virtio16_to_cpu(false, vnet_hdr.csum_start); - u16 o = __virtio16_to_cpu(false, vnet_hdr.csum_offset); + u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start); + u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset); if (!skb_partial_csum_set(skb, s, o)) { err = -EINVAL; goto out_free; @@ -2787,7 +2789,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) } skb_shinfo(skb)->gso_size = - __virtio16_to_cpu(false, vnet_hdr.gso_size); + __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size); skb_shinfo(skb)->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ @@ -3161,9 +3163,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* This is a hint as to how much should be linear. */ vnet_hdr.hdr_len = - __cpu_to_virtio16(false, skb_headlen(skb)); + __cpu_to_virtio16(vio_le(), skb_headlen(skb)); vnet_hdr.gso_size = - __cpu_to_virtio16(false, sinfo->gso_size); + __cpu_to_virtio16(vio_le(), sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) @@ -3181,9 +3183,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) { vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr.csum_start = __cpu_to_virtio16(false, + vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(), skb_checksum_start_offset(skb)); - vnet_hdr.csum_offset = __cpu_to_virtio16(false, + vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(), skb->csum_offset); } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; -- cgit v0.10.2 From 8c85151ddec66f78fbf997e35be005a322fbb9c9 Mon Sep 17 00:00:00 2001 From: WingMan Kwok Date: Wed, 23 Sep 2015 13:37:05 -0400 Subject: net: netcp: ethss: fix error in calling sgmii api with incorrect offset On K2HK, sgmii module registers of slave 0 and 1 are mem mapped to one contiguous block, while those of slave 2 and 3 are mapped to another contiguous block. However, on K2E and K2L, sgmii module registers of all slaves are mem mapped to one contiguous block. SGMII APIs expect slave 0 sgmii base when API is invoked for slave 0 and 1, and slave 2 sgmii base when invoked for other slaves. Before this patch, slave 0 sgmii base is always passed to sgmii API for K2E regardless which slave is the API invoked for. This patch fixes the problem. Signed-off-by: WingMan Kwok Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 6f16d6a..6bff8d8 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -77,6 +77,7 @@ #define GBENU_ALE_OFFSET 0x1e000 #define GBENU_HOST_PORT_NUM 0 #define GBENU_NUM_ALE_ENTRIES 1024 +#define GBENU_SGMII_MODULE_SIZE 0x100 /* 10G Ethernet SS defines */ #define XGBE_MODULE_NAME "netcp-xgbe" @@ -149,8 +150,8 @@ #define XGBE_STATS2_MODULE 2 /* s: 0-based slave_port */ -#define SGMII_BASE(s) \ - (((s) < 2) ? gbe_dev->sgmii_port_regs : gbe_dev->sgmii_port34_regs) +#define SGMII_BASE(d, s) \ + (((s) < 2) ? (d)->sgmii_port_regs : (d)->sgmii_port34_regs) #define GBE_TX_QUEUE 648 #define GBE_TXHOOK_ORDER 0 @@ -1997,13 +1998,8 @@ static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev, return; if (!SLAVE_LINK_IS_XGMII(slave)) { - if (gbe_dev->ss_version == GBE_SS_VERSION_14) - sgmii_link_state = - netcp_sgmii_get_port_link(SGMII_BASE(sp), sp); - else - sgmii_link_state = - netcp_sgmii_get_port_link( - gbe_dev->sgmii_port_regs, sp); + sgmii_link_state = + netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); } phy_link_state = gbe_phy_link_status(slave); @@ -2100,17 +2096,11 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, static void gbe_sgmii_rtreset(struct gbe_priv *priv, struct gbe_slave *slave, bool set) { - void __iomem *sgmii_port_regs; - if (SLAVE_LINK_IS_XGMII(slave)) return; - if ((priv->ss_version == GBE_SS_VERSION_14) && (slave->slave_num >= 2)) - sgmii_port_regs = priv->sgmii_port34_regs; - else - sgmii_port_regs = priv->sgmii_port_regs; - - netcp_sgmii_rtreset(sgmii_port_regs, slave->slave_num, set); + netcp_sgmii_rtreset(SGMII_BASE(priv, slave->slave_num), + slave->slave_num, set); } static void gbe_slave_stop(struct gbe_intf *intf) @@ -2136,17 +2126,12 @@ static void gbe_slave_stop(struct gbe_intf *intf) static void gbe_sgmii_config(struct gbe_priv *priv, struct gbe_slave *slave) { - void __iomem *sgmii_port_regs; - - sgmii_port_regs = priv->sgmii_port_regs; - if ((priv->ss_version == GBE_SS_VERSION_14) && (slave->slave_num >= 2)) - sgmii_port_regs = priv->sgmii_port34_regs; + if (SLAVE_LINK_IS_XGMII(slave)) + return; - if (!SLAVE_LINK_IS_XGMII(slave)) { - netcp_sgmii_reset(sgmii_port_regs, slave->slave_num); - netcp_sgmii_config(sgmii_port_regs, slave->slave_num, - slave->link_interface); - } + netcp_sgmii_reset(SGMII_BASE(priv, slave->slave_num), slave->slave_num); + netcp_sgmii_config(SGMII_BASE(priv, slave->slave_num), slave->slave_num, + slave->link_interface); } static int gbe_slave_open(struct gbe_intf *gbe_intf) @@ -2997,6 +2982,14 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, gbe_dev->switch_regs = regs; gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET; + + /* Although sgmii modules are mem mapped to one contiguous + * region on GBENU devices, setting sgmii_port34_regs allows + * consistent code when accessing sgmii api + */ + gbe_dev->sgmii_port34_regs = gbe_dev->sgmii_port_regs + + (2 * GBENU_SGMII_MODULE_SIZE); + gbe_dev->host_port_regs = gbe_dev->switch_regs + GBENU_HOST_PORT_OFFSET; for (i = 0; i < (gbe_dev->max_num_ports); i++) -- cgit v0.10.2 From 156e3c21f89655f099228577005a6c656b3ceb3d Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:06 -0400 Subject: net: netcp: remove dead code from the driver netcp_core is the first driver that will get initialized and the modules (ethss, pa etc) will then get initialized. So the code at the end of netcp_probe() that iterate over the modules is a dead code as the module list will be always be empty. So remove this code. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1a5aca5..c0bc4b9 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2040,7 +2040,6 @@ static int netcp_probe(struct platform_device *pdev) struct device_node *child, *interfaces; struct netcp_device *netcp_device; struct device *dev = &pdev->dev; - struct netcp_module *module; int ret; if (!node) { @@ -2087,14 +2086,6 @@ static int netcp_probe(struct platform_device *pdev) /* Add the device instance to the list */ list_add_tail(&netcp_device->device_list, &netcp_devices); - /* Probe & attach any modules already registered */ - mutex_lock(&netcp_modules_lock); - for_each_netcp_module(module) { - ret = netcp_module_probe(netcp_device, module); - if (ret < 0) - dev_err(dev, "module(%s) probe failed\n", module->name); - } - mutex_unlock(&netcp_modules_lock); return 0; probe_quit_interface: -- cgit v0.10.2 From 736532a0705ffc27c14f712fa2758a7f8b15e8b4 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:07 -0400 Subject: net: netcp: move netcp_register_interface() to after attach module The netcp interface is not fully initialized before attach the module to the interface. For example, the tx pipe/rx pipe is initialized in ethss module as part of attach(). So until this is complete, the interface can't be registered. So move registration of interface to net device outside the current loop that attaches the modules to the interface. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index c0bc4b9..cf693de 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -291,13 +291,6 @@ static int netcp_module_probe(struct netcp_device *netcp_device, interface_list) { struct netcp_intf_modpriv *intf_modpriv; - /* If interface not registered then register now */ - if (!netcp_intf->netdev_registered) - ret = netcp_register_interface(netcp_intf); - - if (ret) - return -ENODEV; - intf_modpriv = devm_kzalloc(dev, sizeof(*intf_modpriv), GFP_KERNEL); if (!intf_modpriv) @@ -323,6 +316,18 @@ static int netcp_module_probe(struct netcp_device *netcp_device, continue; } } + + /* Now register the interface with netdev */ + list_for_each_entry(netcp_intf, + &netcp_device->interface_head, + interface_list) { + /* If interface not registered then register now */ + if (!netcp_intf->netdev_registered) { + ret = netcp_register_interface(netcp_intf); + if (ret) + return -ENODEV; + } + } return 0; } -- cgit v0.10.2 From e558b1fbf5f43da83f91a31e595a6d65e663b100 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:08 -0400 Subject: net: netcp: add error check to netcp_allocate_rx_buf() Currently, if netcp_allocate_rx_buf() fails due no descriptors in the rx free descriptor queue, inside the netcp_rxpool_refill() function the iterative loop to fill buffers doesn't terminate right away. So modify the netcp_allocate_rx_buf() to return an error code and use it break the loop when there is error. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index cf693de..97e2629 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -801,7 +801,7 @@ static void netcp_rxpool_free(struct netcp_intf *netcp) netcp->rx_pool = NULL; } -static void netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) +static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) { struct knav_dma_desc *hwdesc; unsigned int buf_len, dma_sz; @@ -815,7 +815,7 @@ static void netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) hwdesc = knav_pool_desc_get(netcp->rx_pool); if (IS_ERR_OR_NULL(hwdesc)) { dev_dbg(netcp->ndev_dev, "out of rx pool desc\n"); - return; + return -ENOMEM; } if (likely(fdq == 0)) { @@ -867,25 +867,26 @@ static void netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) knav_pool_desc_map(netcp->rx_pool, hwdesc, sizeof(*hwdesc), &dma, &dma_sz); knav_queue_push(netcp->rx_fdq[fdq], dma, sizeof(*hwdesc), 0); - return; + return 0; fail: knav_pool_desc_put(netcp->rx_pool, hwdesc); + return -ENOMEM; } /* Refill Rx FDQ with descriptors & attached buffers */ static void netcp_rxpool_refill(struct netcp_intf *netcp) { u32 fdq_deficit[KNAV_DMA_FDQ_PER_CHAN] = {0}; - int i; + int i, ret = 0; /* Calculate the FDQ deficit and refill */ for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN && netcp->rx_fdq[i]; i++) { fdq_deficit[i] = netcp->rx_queue_depths[i] - knav_queue_get_count(netcp->rx_fdq[i]); - while (fdq_deficit[i]--) - netcp_allocate_rx_buf(netcp, i); + while (fdq_deficit[i]-- && !ret) + ret = netcp_allocate_rx_buf(netcp, i); } /* end for fdqs */ } -- cgit v0.10.2 From 915c5857874fc211874de1363e88f902e581e6eb Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:09 -0400 Subject: net: netcp: check for interface handle in netcp_module_probe() Currently netcp_module_probe() doesn't check the return value of of_parse_phandle() that points to the interface data for the module and then pass the node ptr to the module which is incorrect. Check for return value and free the intf_modpriv if there is error. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 97e2629..d39dce3 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -299,6 +299,11 @@ static int netcp_module_probe(struct netcp_device *netcp_device, interface = of_parse_phandle(netcp_intf->node_interface, module->name, 0); + if (!interface) { + devm_kfree(dev, intf_modpriv); + continue; + } + intf_modpriv->netcp_priv = netcp_intf; intf_modpriv->netcp_module = module; list_add_tail(&intf_modpriv->intf_list, -- cgit v0.10.2 From 99f8ef5dc6546ac28cc7a03ff8301bc72fe5527e Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:10 -0400 Subject: net: netcp: allocate buffers to desc before re-enable interrupt Currently netcp_rxpool_refill() that refill descriptors and attached buffers to fdq while interrupt is enabled as part of NAPI poll. Doing it while interrupt is disabled could be beneficial as hardware will not be starved when CPU is busy with processing interrupt. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index d39dce3..8026daa 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -904,12 +904,12 @@ static int netcp_rx_poll(struct napi_struct *napi, int budget) packets = netcp_process_rx_packets(netcp, budget); + netcp_rxpool_refill(netcp); if (packets < budget) { napi_complete(&netcp->rx_napi); knav_queue_enable_notify(netcp->rx_queue); } - netcp_rxpool_refill(netcp); return packets; } -- cgit v0.10.2 From 8ceaf361ffd131e835aef1e6cdb1d5ba70702617 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:11 -0400 Subject: net: netcp: fix deadlock reported by lockup detector A deadlock trace is seen in netcp driver with lockup detector enabled. The trace log is provided below for reference. This patch fixes the bug by removing the usage of netcp_modules_lock within ndo_ops functions. ndo_{open/close/ioctl)() is already called with rtnl_lock held. So there is no need to hold another mutex for serialization across processes on multiple cores. So remove use of netcp_modules_lock mutex from these ndo ops functions. ndo_set_rx_mode() shouldn't be using a mutex as it is called from atomic context. In the case of ndo_set_rx_mode(), there can be call to this API without rtnl_lock held from an atomic context. As the underlying modules are expected to add address to a hardware table, it is to be protected across concurrent updates and hence a spin lock is used to synchronize the access. Same with ndo_vlan_rx_add_vid() & ndo_vlan_rx_kill_vid(). Probably the netcp_modules_lock is used to protect the module not being removed as part of rmmod. Currently this is not fully implemented and assumes the interface is brought down before doing rmmod of modules. The support for rmmmod while interface is up is expected in a future patch set when additional modules such as pa, qos are added. For now all of the tests such as if up/down, reboot, iperf works fine with this patch applied. Deadlock trace seen with lockup detector enabled is shown below for reference. [ 16.863014] ====================================================== [ 16.869183] [ INFO: possible circular locking dependency detected ] [ 16.875441] 4.1.6-01265-gfb1e101 #1 Tainted: G W [ 16.881176] ------------------------------------------------------- [ 16.887432] ifconfig/1662 is trying to acquire lock: [ 16.892386] (netcp_modules_lock){+.+.+.}, at: [] netcp_ndo_open+0x168/0x518 [ 16.900321] [ 16.900321] but task is already holding lock: [ 16.906144] (rtnl_mutex){+.+.+.}, at: [] devinet_ioctl+0xf8/0x7e4 [ 16.913206] [ 16.913206] which lock already depends on the new lock. [ 16.913206] [ 16.921372] [ 16.921372] the existing dependency chain (in reverse order) is: [ 16.928844] -> #1 (rtnl_mutex){+.+.+.}: [ 16.932865] [] mutex_lock_nested+0x68/0x4a8 [ 16.938521] [] register_netdev+0xc/0x24 [ 16.943831] [] netcp_module_probe+0x214/0x2ec [ 16.949660] [] netcp_register_module+0xd4/0x140 [ 16.955663] [] keystone_gbe_init+0x10/0x28 [ 16.961233] [] do_one_initcall+0xb8/0x1f8 [ 16.966714] [] kernel_init_freeable+0x148/0x1e8 [ 16.972720] [] kernel_init+0xc/0xe8 [ 16.977682] [] ret_from_fork+0x14/0x3c [ 16.982905] -> #0 (netcp_modules_lock){+.+.+.}: [ 16.987619] [] lock_acquire+0x118/0x320 [ 16.992928] [] mutex_lock_nested+0x68/0x4a8 [ 16.998582] [] netcp_ndo_open+0x168/0x518 [ 17.004064] [] __dev_open+0xa8/0x10c [ 17.009112] [] __dev_change_flags+0x94/0x144 [ 17.014853] [] dev_change_flags+0x18/0x48 [ 17.020334] [] devinet_ioctl+0x6dc/0x7e4 [ 17.025729] [] sock_ioctl+0x1d0/0x2a8 [ 17.030865] [] do_vfs_ioctl+0x41c/0x688 [ 17.036173] [] SyS_ioctl+0x34/0x5c [ 17.041046] [] ret_fast_syscall+0x0/0x54 [ 17.046441] [ 17.046441] other info that might help us debug this: [ 17.046441] [ 17.054434] Possible unsafe locking scenario: [ 17.054434] [ 17.060343] CPU0 CPU1 [ 17.064862] ---- ---- [ 17.069381] lock(rtnl_mutex); [ 17.072522] lock(netcp_modules_lock); [ 17.078875] lock(rtnl_mutex); [ 17.084532] lock(netcp_modules_lock); [ 17.088366] [ 17.088366] *** DEADLOCK *** [ 17.088366] [ 17.094279] 1 lock held by ifconfig/1662: [ 17.098278] #0: (rtnl_mutex){+.+.+.}, at: [] devinet_ioctl+0xf8/0x7e4 [ 17.105774] [ 17.105774] stack backtrace: [ 17.110124] CPU: 1 PID: 1662 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 17.118637] Hardware name: Keystone [ 17.122123] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 17.129862] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 17.137079] [] (dump_stack) from [] (print_circular_bug+0x210/0x330) [ 17.145161] [] (print_circular_bug) from [] (validate_chain.isra.35+0xf98/0x13ac) [ 17.154372] [] (validate_chain.isra.35) from [] (__lock_acquire+0x52c/0xcc0) [ 17.163149] [] (__lock_acquire) from [] (lock_acquire+0x118/0x320) [ 17.171058] [] (lock_acquire) from [] (mutex_lock_nested+0x68/0x4a8) [ 17.179140] [] (mutex_lock_nested) from [] (netcp_ndo_open+0x168/0x518) [ 17.187484] [] (netcp_ndo_open) from [] (__dev_open+0xa8/0x10c) [ 17.195133] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 17.203129] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 17.211560] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 17.219729] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 17.227378] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 17.234939] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 17.242242] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 17.258855] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off [ 17.271282] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 17.279712] in_atomic(): 1, irqs_disabled(): 0, pid: 1662, name: ifconfig [ 17.286500] INFO: lockdep is turned off. [ 17.290413] Preemption disabled at:[< (null)>] (null) [ 17.295728] [ 17.297214] CPU: 1 PID: 1662 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 17.305735] Hardware name: Keystone [ 17.309223] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 17.316970] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 17.324194] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 17.332112] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 17.340724] [] (netcp_set_rx_mode) from [] (dev_set_rx_mode+0x1c/0x28) [ 17.348982] [] (dev_set_rx_mode) from [] (__dev_open+0xc4/0x10c) [ 17.356724] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 17.364729] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 17.373166] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 17.381344] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 17.388994] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 17.396563] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 17.403873] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 17.413772] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready udhcpc (v1.20.2) started Sending discover... [ 18.690666] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off Sending discover... [ 22.250972] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off [ 22.258721] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [ 22.265458] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 22.273896] in_atomic(): 1, irqs_disabled(): 0, pid: 342, name: kworker/1:1 [ 22.280854] INFO: lockdep is turned off. [ 22.284767] Preemption disabled at:[< (null)>] (null) [ 22.290074] [ 22.291568] CPU: 1 PID: 342 Comm: kworker/1:1 Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 22.300255] Hardware name: Keystone [ 22.303750] Workqueue: ipv6_addrconf addrconf_dad_work [ 22.308895] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 22.316643] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 22.323867] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 22.331786] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 22.340394] [] (netcp_set_rx_mode) from [] (__dev_mc_add+0x54/0x68) [ 22.348401] [] (__dev_mc_add) from [] (igmp6_group_added+0x168/0x1b4) [ 22.356580] [] (igmp6_group_added) from [] (ipv6_dev_mc_inc+0x4f0/0x5a8) [ 22.365019] [] (ipv6_dev_mc_inc) from [] (addrconf_dad_work+0x21c/0x33c) [ 22.373460] [] (addrconf_dad_work) from [] (process_one_work+0x214/0x8d0) [ 22.381986] [] (process_one_work) from [] (worker_thread+0x48/0x4bc) [ 22.390071] [] (worker_thread) from [] (kthread+0xf0/0x108) [ 22.397381] [] (kthread) from [] Trace related to incorrect usage of mutex inside ndo_set_rx_mode [ 24.086066] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 24.094506] in_atomic(): 1, irqs_disabled(): 0, pid: 1682, name: ifconfig [ 24.101291] INFO: lockdep is turned off. [ 24.105203] Preemption disabled at:[< (null)>] (null) [ 24.110511] [ 24.112005] CPU: 2 PID: 1682 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 24.120518] Hardware name: Keystone [ 24.124018] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 24.131772] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 24.138989] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 24.146908] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 24.155523] [] (netcp_set_rx_mode) from [] (dev_set_rx_mode+0x1c/0x28) [ 24.163787] [] (dev_set_rx_mode) from [] (__dev_open+0xc4/0x10c) [ 24.171531] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 24.179528] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 24.187966] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 24.196145] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 24.203803] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 24.211373] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 24.218676] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 24.227156] IPv6: ADDRCONF(NETDEV_UP): eth1: link is not ready Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 8026daa..9f9832f 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -367,7 +367,6 @@ int netcp_register_module(struct netcp_module *module) if (ret < 0) goto fail; } - mutex_unlock(&netcp_modules_lock); return 0; @@ -1395,7 +1394,6 @@ static void netcp_addr_sweep_del(struct netcp_intf *netcp) continue; dev_dbg(netcp->ndev_dev, "deleting address %pM, type %x\n", naddr->addr, naddr->type); - mutex_lock(&netcp_modules_lock); for_each_module(netcp, priv) { module = priv->netcp_module; if (!module->del_addr) @@ -1404,7 +1402,6 @@ static void netcp_addr_sweep_del(struct netcp_intf *netcp) naddr); WARN_ON(error); } - mutex_unlock(&netcp_modules_lock); netcp_addr_del(netcp, naddr); } } @@ -1421,7 +1418,7 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) continue; dev_dbg(netcp->ndev_dev, "adding address %pM, type %x\n", naddr->addr, naddr->type); - mutex_lock(&netcp_modules_lock); + for_each_module(netcp, priv) { module = priv->netcp_module; if (!module->add_addr) @@ -1429,7 +1426,6 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) error = module->add_addr(priv->module_priv, naddr); WARN_ON(error); } - mutex_unlock(&netcp_modules_lock); } } @@ -1443,6 +1439,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) ndev->flags & IFF_ALLMULTI || netdev_mc_count(ndev) > NETCP_MAX_MCAST_ADDR); + spin_lock(&netcp->lock); /* first clear all marks */ netcp_addr_clear_mark(netcp); @@ -1461,6 +1458,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) /* finally sweep and callout into modules */ netcp_addr_sweep_del(netcp); netcp_addr_sweep_add(netcp); + spin_unlock(&netcp->lock); } static void netcp_free_navigator_resources(struct netcp_intf *netcp) @@ -1625,7 +1623,6 @@ static int netcp_ndo_open(struct net_device *ndev) goto fail; } - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->open) { @@ -1636,7 +1633,6 @@ static int netcp_ndo_open(struct net_device *ndev) } } } - mutex_unlock(&netcp_modules_lock); napi_enable(&netcp->rx_napi); napi_enable(&netcp->tx_napi); @@ -1653,7 +1649,6 @@ fail_open: if (module->close) module->close(intf_modpriv->module_priv, ndev); } - mutex_unlock(&netcp_modules_lock); fail: netcp_free_navigator_resources(netcp); @@ -1677,7 +1672,6 @@ static int netcp_ndo_stop(struct net_device *ndev) napi_disable(&netcp->rx_napi); napi_disable(&netcp->tx_napi); - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->close) { @@ -1686,7 +1680,6 @@ static int netcp_ndo_stop(struct net_device *ndev) dev_err(netcp->ndev_dev, "Close failed\n"); } } - mutex_unlock(&netcp_modules_lock); /* Recycle Rx descriptors from completion queue */ netcp_empty_rx_queue(netcp); @@ -1714,7 +1707,6 @@ static int netcp_ndo_ioctl(struct net_device *ndev, if (!netif_running(ndev)) return -EINVAL; - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (!module->ioctl) @@ -1730,7 +1722,6 @@ static int netcp_ndo_ioctl(struct net_device *ndev, } out: - mutex_unlock(&netcp_modules_lock); return (ret == 0) ? 0 : err; } @@ -1765,11 +1756,12 @@ static int netcp_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) struct netcp_intf *netcp = netdev_priv(ndev); struct netcp_intf_modpriv *intf_modpriv; struct netcp_module *module; + unsigned long flags; int err = 0; dev_dbg(netcp->ndev_dev, "adding rx vlan id: %d\n", vid); - mutex_lock(&netcp_modules_lock); + spin_lock_irqsave(&netcp->lock, flags); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if ((module->add_vid) && (vid != 0)) { @@ -1781,7 +1773,8 @@ static int netcp_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) } } } - mutex_unlock(&netcp_modules_lock); + spin_unlock_irqrestore(&netcp->lock, flags); + return err; } @@ -1790,11 +1783,12 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) struct netcp_intf *netcp = netdev_priv(ndev); struct netcp_intf_modpriv *intf_modpriv; struct netcp_module *module; + unsigned long flags; int err = 0; dev_dbg(netcp->ndev_dev, "removing rx vlan id: %d\n", vid); - mutex_lock(&netcp_modules_lock); + spin_lock_irqsave(&netcp->lock, flags); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->del_vid) { @@ -1806,7 +1800,7 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) } } } - mutex_unlock(&netcp_modules_lock); + spin_unlock_irqrestore(&netcp->lock, flags); return err; } -- cgit v0.10.2 From aaa0062ecf4877a26dea66bee1039c6eaf906c94 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:43:41 +0100 Subject: 8139cp: Do not re-enable RX interrupts in cp_tx_timeout() If an RX interrupt was already received but NAPI has not yet run when the RX timeout happens, we end up in cp_tx_timeout() with RX interrupts already disabled. Blindly re-enabling them will cause an IRQ storm. (This is made particularly horrid by the fact that cp_interrupt() always returns that it's handled the interrupt, even when it hasn't actually done anything. If it didn't do that, the core IRQ code would have detected the storm and handled it, I'd have had a clear smoking gun backtrace instead of just a spontaneously resetting router, and I'd have at *least* two days of my life back. Changing the return value of cp_interrupt() will be argued about under separate cover.) Unconditionally leave RX interrupts disabled after the reset, and schedule NAPI to check the receive ring and re-enable them. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index ba3dab7..947932d 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1262,9 +1262,10 @@ static void cp_tx_timeout(struct net_device *dev) rc = cp_init_rings(cp); cp_start_hw(cp); __cp_set_rx_mode(dev); - cp_enable_irq(cp); + cpw16_f(IntrMask, cp_norx_intr_mask); netif_wake_queue(dev); + napi_schedule_irqoff(&cp->napi); spin_unlock_irqrestore(&cp->lock, flags); } -- cgit v0.10.2 From 26b0bad6ac3a0167792dc4ffb276c29bc597d239 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:44:06 +0100 Subject: 8139cp: Fix tx_queued debug message to print correct slot numbers After a certain amount of staring at the debug output of this driver, I realised it was lying to me. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 947932d..cbca0de 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -786,7 +786,8 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, wmb(); cp->tx_skb[entry] = skb; - entry = NEXT_TX(entry); + netif_dbg(cp, tx_queued, cp->dev, "tx queued, slot %d, skblen %d\n", + entry, skb->len); } else { struct cp_desc *txd; u32 first_len, first_eor; @@ -805,7 +806,6 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, goto out_dma_error; cp->tx_skb[entry] = skb; - entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; @@ -813,6 +813,8 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, u32 ctrl; dma_addr_t mapping; + entry = NEXT_TX(entry); + len = skb_frag_size(this_frag); mapping = dma_map_single(&cp->pdev->dev, skb_frag_address(this_frag), @@ -848,9 +850,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->opts1 = cpu_to_le32(ctrl); wmb(); - cp->tx_skb[entry] = skb; - entry = NEXT_TX(entry); } txd = &cp->tx_ring[first_entry]; @@ -873,12 +873,13 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->opts1 = cpu_to_le32(first_eor | first_len | FirstFrag | DescOwn); wmb(); + + netif_dbg(cp, tx_queued, cp->dev, "tx queued, slots %d-%d, skblen %d\n", + first_entry, entry, skb->len); } - cp->tx_head = entry; + cp->tx_head = NEXT_TX(entry); netdev_sent_queue(dev, skb->len); - netif_dbg(cp, tx_queued, cp->dev, "tx queued, slot %d, skblen %d\n", - entry, skb->len); if (TX_BUFFS_AVAIL(cp) <= (MAX_SKB_FRAGS + 1)) netif_stop_queue(dev); -- cgit v0.10.2 From a3b804043f490aeec57d8ca5baccdd35e6250857 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:44:38 +0100 Subject: 8139cp: Fix TSO/scatter-gather descriptor setup When sending a TSO frame in multiple buffers, we were neglecting to set the first descriptor up in TSO mode. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index cbca0de..75a8cee 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -790,7 +790,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, entry, skb->len); } else { struct cp_desc *txd; - u32 first_len, first_eor; + u32 first_len, first_eor, ctrl; dma_addr_t first_mapping; int frag, first_entry = entry; const struct iphdr *ip = ip_hdr(skb); @@ -810,7 +810,6 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; u32 len; - u32 ctrl; dma_addr_t mapping; entry = NEXT_TX(entry); @@ -858,20 +857,19 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->addr = cpu_to_le64(first_mapping); wmb(); - if (skb->ip_summed == CHECKSUM_PARTIAL) { + ctrl = first_eor | first_len | FirstFrag | DescOwn; + if (mss) + ctrl |= LargeSend | ((mss & MSSMask) << MSSShift); + else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (ip->protocol == IPPROTO_TCP) - txd->opts1 = cpu_to_le32(first_eor | first_len | - FirstFrag | DescOwn | - IPCS | TCPCS); + ctrl |= IPCS | TCPCS; else if (ip->protocol == IPPROTO_UDP) - txd->opts1 = cpu_to_le32(first_eor | first_len | - FirstFrag | DescOwn | - IPCS | UDPCS); + ctrl |= IPCS | UDPCS; else BUG(); - } else - txd->opts1 = cpu_to_le32(first_eor | first_len | - FirstFrag | DescOwn); + } + + txd->opts1 = cpu_to_le32(ctrl); wmb(); netif_dbg(cp, tx_queued, cp->dev, "tx queued, slots %d-%d, skblen %d\n", -- cgit v0.10.2 From 0a5aeee0b79fa99d8e04c98dd4e87d4f52aa497b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:44:57 +0100 Subject: 8139cp: Reduce duplicate csum/tso code in cp_start_xmit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We calculate the value of the opts1 descriptor field in three different places. With two different behaviours when given an invalid packet to be checksummed — none of them correct. Sort that out. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 75a8cee..b3bd8b1 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -733,7 +733,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, { struct cp_private *cp = netdev_priv(dev); unsigned entry; - u32 eor, flags; + u32 eor, opts1; unsigned long intr_flags; __le32 opts2; int mss = 0; @@ -753,6 +753,21 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, mss = skb_shinfo(skb)->gso_size; opts2 = cpu_to_le32(cp_tx_vlan_tag(skb)); + opts1 = DescOwn; + if (mss) + opts1 |= LargeSend | ((mss & MSSMask) << MSSShift); + else if (skb->ip_summed == CHECKSUM_PARTIAL) { + const struct iphdr *ip = ip_hdr(skb); + if (ip->protocol == IPPROTO_TCP) + opts1 |= IPCS | TCPCS; + else if (ip->protocol == IPPROTO_UDP) + opts1 |= IPCS | UDPCS; + else { + WARN_ONCE(1, + "Net bug: asked to checksum invalid Legacy IP packet\n"); + goto out_dma_error; + } + } if (skb_shinfo(skb)->nr_frags == 0) { struct cp_desc *txd = &cp->tx_ring[entry]; @@ -768,21 +783,9 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->addr = cpu_to_le64(mapping); wmb(); - flags = eor | len | DescOwn | FirstFrag | LastFrag; - - if (mss) - flags |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { - const struct iphdr *ip = ip_hdr(skb); - if (ip->protocol == IPPROTO_TCP) - flags |= IPCS | TCPCS; - else if (ip->protocol == IPPROTO_UDP) - flags |= IPCS | UDPCS; - else - WARN_ON(1); /* we need a WARN() */ - } + opts1 |= eor | len | FirstFrag | LastFrag; - txd->opts1 = cpu_to_le32(flags); + txd->opts1 = cpu_to_le32(opts1); wmb(); cp->tx_skb[entry] = skb; @@ -793,7 +796,6 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, u32 first_len, first_eor, ctrl; dma_addr_t first_mapping; int frag, first_entry = entry; - const struct iphdr *ip = ip_hdr(skb); /* We must give this initial chunk to the device last. * Otherwise we could race with the device. @@ -825,19 +827,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; - ctrl = eor | len | DescOwn; - - if (mss) - ctrl |= LargeSend | - ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (ip->protocol == IPPROTO_TCP) - ctrl |= IPCS | TCPCS; - else if (ip->protocol == IPPROTO_UDP) - ctrl |= IPCS | UDPCS; - else - BUG(); - } + ctrl = opts1 | eor | len; if (frag == skb_shinfo(skb)->nr_frags - 1) ctrl |= LastFrag; @@ -857,18 +847,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->addr = cpu_to_le64(first_mapping); wmb(); - ctrl = first_eor | first_len | FirstFrag | DescOwn; - if (mss) - ctrl |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (ip->protocol == IPPROTO_TCP) - ctrl |= IPCS | TCPCS; - else if (ip->protocol == IPPROTO_UDP) - ctrl |= IPCS | UDPCS; - else - BUG(); - } - + ctrl = opts1 | first_eor | first_len | FirstFrag; txd->opts1 = cpu_to_le32(ctrl); wmb(); -- cgit v0.10.2 From 7f4c685633e2df9ba10d49a31dda13715745db37 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:45:16 +0100 Subject: 8139cp: Fix DMA unmapping of transmitted buffers The low 16 bits of the 'opts1' field in the TX descriptor are supposed to still contain the buffer length when the descriptor is handed back to us. In practice, at least on my hardware, they don't. So stash the original value of the opts1 field and get the length to unmap from there. There are other ways we could have worked out the length, but I actually want a stash of the opts1 field anyway so that I can dump it alongside the contents of the descriptor ring when we suffer a TX timeout. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index b3bd8b1..ec6bd86 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -341,6 +341,7 @@ struct cp_private { unsigned tx_tail; struct cp_desc *tx_ring; struct sk_buff *tx_skb[CP_TX_RING_SIZE]; + u32 tx_opts[CP_TX_RING_SIZE]; unsigned rx_buf_sz; unsigned wol_enabled : 1; /* Is Wake-on-LAN enabled? */ @@ -665,7 +666,7 @@ static void cp_tx (struct cp_private *cp) BUG_ON(!skb); dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr), - le32_to_cpu(txd->opts1) & 0xffff, + cp->tx_opts[tx_tail] & 0xffff, PCI_DMA_TODEVICE); if (status & LastFrag) { @@ -789,6 +790,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, wmb(); cp->tx_skb[entry] = skb; + cp->tx_opts[entry] = opts1; netif_dbg(cp, tx_queued, cp->dev, "tx queued, slot %d, skblen %d\n", entry, skb->len); } else { @@ -839,6 +841,8 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->opts1 = cpu_to_le32(ctrl); wmb(); + + cp->tx_opts[entry] = ctrl; cp->tx_skb[entry] = skb; } @@ -851,6 +855,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, txd->opts1 = cpu_to_le32(ctrl); wmb(); + cp->tx_opts[first_entry] = ctrl; netif_dbg(cp, tx_queued, cp->dev, "tx queued, slots %d-%d, skblen %d\n", first_entry, entry, skb->len); } @@ -1093,6 +1098,7 @@ static int cp_init_rings (struct cp_private *cp) { memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE); cp->tx_ring[CP_TX_RING_SIZE - 1].opts1 = cpu_to_le32(RingEnd); + memset(cp->tx_opts, 0, sizeof(cp->tx_opts)); cp_init_rings_index(cp); @@ -1150,6 +1156,7 @@ static void cp_clean_rings (struct cp_private *cp) memset(cp->rx_ring, 0, sizeof(struct cp_desc) * CP_RX_RING_SIZE); memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE); + memset(cp->tx_opts, 0, sizeof(cp->tx_opts)); memset(cp->rx_skb, 0, sizeof(struct sk_buff *) * CP_RX_RING_SIZE); memset(cp->tx_skb, 0, sizeof(struct sk_buff *) * CP_TX_RING_SIZE); -- cgit v0.10.2 From 41b976414c88016e2c9d9b2f6667ee67a998d388 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Sep 2015 09:45:31 +0100 Subject: 8139cp: Dump contents of descriptor ring on TX timeout We are seeing unexplained TX timeouts under heavy load. Let's try to get a better idea of what's going on. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index ec6bd86..686334f 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -157,6 +157,7 @@ enum { NWayAdvert = 0x66, /* MII ADVERTISE */ NWayLPAR = 0x68, /* MII LPA */ NWayExpansion = 0x6A, /* MII Expansion */ + TxDmaOkLowDesc = 0x82, /* Low 16 bit address of a Tx descriptor. */ Config5 = 0xD8, /* Config5 */ TxPoll = 0xD9, /* Tell chip to check Tx descriptors for work */ RxMaxSize = 0xDA, /* Max size of an Rx packet (8169 only) */ @@ -1234,7 +1235,7 @@ static void cp_tx_timeout(struct net_device *dev) { struct cp_private *cp = netdev_priv(dev); unsigned long flags; - int rc; + int rc, i; netdev_warn(dev, "Transmit timeout, status %2x %4x %4x %4x\n", cpr8(Cmd), cpr16(CpCmd), @@ -1242,6 +1243,17 @@ static void cp_tx_timeout(struct net_device *dev) spin_lock_irqsave(&cp->lock, flags); + netif_dbg(cp, tx_err, cp->dev, "TX ring head %d tail %d desc %x\n", + cp->tx_head, cp->tx_tail, cpr16(TxDmaOkLowDesc)); + for (i = 0; i < CP_TX_RING_SIZE; i++) { + netif_dbg(cp, tx_err, cp->dev, + "TX slot %d @%p: %08x (%08x) %08x %llx %p\n", + i, &cp->tx_ring[i], le32_to_cpu(cp->tx_ring[i].opts1), + cp->tx_opts[i], le32_to_cpu(cp->tx_ring[i].opts2), + le64_to_cpu(cp->tx_ring[i].addr), + cp->tx_skb[i]); + } + cp_stop_hw(cp); cp_clean_rings(cp); rc = cp_init_rings(cp); -- cgit v0.10.2 From 69e5d3f893e19613486f300fd6e631810338aa4b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 14 Sep 2015 10:28:34 +1000 Subject: drm/qxl: only report first monitor as connected if we have no state If the server isn't new enough to give us state, report the first monitor as always connected, otherwise believe the server side. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 7c6225c..dd845f8 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -886,13 +886,15 @@ static enum drm_connector_status qxl_conn_detect( drm_connector_to_qxl_output(connector); struct drm_device *ddev = connector->dev; struct qxl_device *qdev = ddev->dev_private; - int connected; + bool connected = false; /* The first monitor is always connected */ - connected = (output->index == 0) || - (qdev->client_monitors_config && - qdev->client_monitors_config->count > output->index && - qxl_head_enabled(&qdev->client_monitors_config->heads[output->index])); + if (!qdev->client_monitors_config) { + if (output->index == 0) + connected = true; + } else + connected = qdev->client_monitors_config->count > output->index && + qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); if (!connected) -- cgit v0.10.2 From aec9e12953e777f62acdab069656ebd9bcb6c9ba Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 17 Sep 2015 16:30:54 +0530 Subject: drm/mgag200: Fix error handling paths in fbdev driver Set up error handling in mgag200_fbdev_init and mgag200fb_create such that they release the things they allocate, rather than relying on someone calling mga_fbdev_destroy. Based on a patch by Sudip Mukherjee Link: http://lkml.kernel.org/r/55F6E68D.8070800@codeaurora.org Reported-by: Ingo Molnar Cc: Daniel Vetter Cc: Dave Airlie Cc: David Airlie Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sudip Mukherjee Cc: Thomas Gleixner Cc: dri-devel Signed-off-by: Archit Taneja Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c index 87de15e..b35b5b2 100644 --- a/drivers/gpu/drm/mgag200/mgag200_fb.c +++ b/drivers/gpu/drm/mgag200/mgag200_fb.c @@ -186,17 +186,19 @@ static int mgag200fb_create(struct drm_fb_helper *helper, sysram = vmalloc(size); if (!sysram) - return -ENOMEM; + goto err_sysram; info = drm_fb_helper_alloc_fbi(helper); - if (IS_ERR(info)) - return PTR_ERR(info); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto err_alloc_fbi; + } info->par = mfbdev; ret = mgag200_framebuffer_init(dev, &mfbdev->mfb, &mode_cmd, gobj); if (ret) - return ret; + goto err_framebuffer_init; mfbdev->sysram = sysram; mfbdev->size = size; @@ -225,7 +227,17 @@ static int mgag200fb_create(struct drm_fb_helper *helper, DRM_DEBUG_KMS("allocated %dx%d\n", fb->width, fb->height); + return 0; + +err_framebuffer_init: + drm_fb_helper_release_fbi(helper); +err_alloc_fbi: + vfree(sysram); +err_sysram: + drm_gem_object_unreference_unlocked(gobj); + + return ret; } static int mga_fbdev_destroy(struct drm_device *dev, @@ -276,23 +288,26 @@ int mgag200_fbdev_init(struct mga_device *mdev) ret = drm_fb_helper_init(mdev->dev, &mfbdev->helper, mdev->num_crtc, MGAG200FB_CONN_LIMIT); if (ret) - return ret; + goto err_fb_helper; ret = drm_fb_helper_single_add_all_connectors(&mfbdev->helper); if (ret) - goto fini; + goto err_fb_setup; /* disable all the possible outputs/crtcs before entering KMS mode */ drm_helper_disable_unused_functions(mdev->dev); ret = drm_fb_helper_initial_config(&mfbdev->helper, bpp_sel); if (ret) - goto fini; + goto err_fb_setup; return 0; -fini: +err_fb_setup: drm_fb_helper_fini(&mfbdev->helper); +err_fb_helper: + mdev->mfbdev = NULL; + return ret; } -- cgit v0.10.2 From 728f86607d47f7d2d24d61fd30852faa66ca5aa9 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 17 Sep 2015 16:30:55 +0530 Subject: drm/mgag200: Fix driver_load error handling mgag200_driver_load's error path just calls the drm driver's driver_unload op. It isn't safe to call this because it doesn't handle things well if driver_load fails somewhere mid way. Replace the call to mgag200_driver_unload with a more finegrained error handling path. Link: http://lkml.kernel.org/r/55F6E68D.8070800@codeaurora.org Reported-by: Ingo Molnar Cc: Daniel Vetter Cc: Dave Airlie Cc: David Airlie Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sudip Mukherjee Cc: Thomas Gleixner Cc: dri-devel Signed-off-by: Archit Taneja Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index de06388..b1a0f56 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -220,7 +220,7 @@ int mgag200_driver_load(struct drm_device *dev, unsigned long flags) } r = mgag200_mm_init(mdev); if (r) - goto out; + goto err_mm; drm_mode_config_init(dev); dev->mode_config.funcs = (void *)&mga_mode_funcs; @@ -233,7 +233,7 @@ int mgag200_driver_load(struct drm_device *dev, unsigned long flags) r = mgag200_modeset_init(mdev); if (r) { dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r); - goto out; + goto err_modeset; } /* Make small buffers to store a hardware cursor (double buffered icon updates) */ @@ -241,20 +241,24 @@ int mgag200_driver_load(struct drm_device *dev, unsigned long flags) &mdev->cursor.pixels_1); mgag200_bo_create(dev, roundup(48*64, PAGE_SIZE), 0, 0, &mdev->cursor.pixels_2); - if (!mdev->cursor.pixels_2 || !mdev->cursor.pixels_1) - goto cursor_nospace; - mdev->cursor.pixels_current = mdev->cursor.pixels_1; - mdev->cursor.pixels_prev = mdev->cursor.pixels_2; - goto cursor_done; - cursor_nospace: - mdev->cursor.pixels_1 = NULL; - mdev->cursor.pixels_2 = NULL; - dev_warn(&dev->pdev->dev, "Could not allocate space for cursors. Not doing hardware cursors.\n"); - cursor_done: - -out: - if (r) - mgag200_driver_unload(dev); + if (!mdev->cursor.pixels_2 || !mdev->cursor.pixels_1) { + mdev->cursor.pixels_1 = NULL; + mdev->cursor.pixels_2 = NULL; + dev_warn(&dev->pdev->dev, + "Could not allocate space for cursors. Not doing hardware cursors.\n"); + } else { + mdev->cursor.pixels_current = mdev->cursor.pixels_1; + mdev->cursor.pixels_prev = mdev->cursor.pixels_2; + } + + return 0; + +err_modeset: + drm_mode_config_cleanup(dev); + mgag200_mm_fini(mdev); +err_mm: + dev->dev_private = NULL; + return r; } -- cgit v0.10.2 From 14d11b8dca17f6dedf4b62d7391b9b294e3414ff Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 21 Sep 2015 15:33:47 +0200 Subject: drm/layerscape: fix handling fsl_dcu_drm_plane_index result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2038576 Signed-off-by: Andrzej Hajda Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index 82be6b8..d1e300d 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -58,7 +58,8 @@ static void fsl_dcu_drm_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *old_state) { struct fsl_dcu_drm_device *fsl_dev = plane->dev->dev_private; - unsigned int index, value, ret; + unsigned int value; + int index, ret; index = fsl_dcu_drm_plane_index(plane); if (index < 0) -- cgit v0.10.2 From 7bbe33ff1896d225588b37c574c0d80dbc63c657 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 22 Sep 2015 13:09:32 -0400 Subject: geneve: use network byte order for destination port config parameter This is primarily for consistancy with vxlan and other tunnels which use network byte order for similar parameters. Signed-off-by: John W. Linville Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 4ce8b7b..8f5c02e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,7 +815,7 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, static int geneve_configure(struct net *net, struct net_device *dev, __be32 rem_addr, __u32 vni, __u8 ttl, __u8 tos, - __u16 dst_port, bool metadata) + __be16 dst_port, bool metadata) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); @@ -840,10 +840,10 @@ static int geneve_configure(struct net *net, struct net_device *dev, geneve->ttl = ttl; geneve->tos = tos; - geneve->dst_port = htons(dst_port); + geneve->dst_port = dst_port; geneve->collect_md = metadata; - t = geneve_find_dev(gn, htons(dst_port), rem_addr, geneve->vni, + t = geneve_find_dev(gn, dst_port, rem_addr, geneve->vni, &tun_on_same_port, &tun_collect_md); if (t) return -EBUSY; @@ -867,7 +867,7 @@ static int geneve_configure(struct net *net, struct net_device *dev, static int geneve_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - __u16 dst_port = GENEVE_UDP_PORT; + __be16 dst_port = htons(GENEVE_UDP_PORT); __u8 ttl = 0, tos = 0; bool metadata = false; __be32 rem_addr; @@ -886,7 +886,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, tos = nla_get_u8(data[IFLA_GENEVE_TOS]); if (data[IFLA_GENEVE_PORT]) - dst_port = nla_get_u16(data[IFLA_GENEVE_PORT]); + dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); if (data[IFLA_GENEVE_COLLECT_METADATA]) metadata = true; @@ -909,7 +909,7 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ - nla_total_size(sizeof(__u16)) + /* IFLA_GENEVE_PORT */ + nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 0; } @@ -931,7 +931,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GENEVE_PORT, ntohs(geneve->dst_port))) + if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) goto nla_put_failure; if (geneve->collect_md) { @@ -971,7 +971,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, if (IS_ERR(dev)) return dev; - err = geneve_configure(net, dev, 0, 0, 0, 0, dst_port, true); + err = geneve_configure(net, dev, 0, 0, 0, 0, htons(dst_port), true); if (err) { free_netdev(dev); return ERR_PTR(err); -- cgit v0.10.2 From 49558b471369e790650df8706b5608fee27af42c Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Sat, 19 Sep 2015 00:31:10 +0200 Subject: drm/vmwgfx: Fix uninitialized return in vmw_cotable_unbind() Function vmw_cotable_unbind() uses the uninitialized variable ret as return value. Make the result deterministic and directly return as the variable is unused anyway. Detected by Coverity CID 1324256. Signed-off-by: Christian Engelmayer Reviewed-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index ce659a1..092ea81 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -311,7 +311,6 @@ static int vmw_cotable_unbind(struct vmw_resource *res, struct vmw_private *dev_priv = res->dev_priv; struct ttm_buffer_object *bo = val_buf->bo; struct vmw_fence_obj *fence; - int ret; if (list_empty(&res->mob_head)) return 0; @@ -328,7 +327,7 @@ static int vmw_cotable_unbind(struct vmw_resource *res, if (likely(fence != NULL)) vmw_fence_obj_unreference(&fence); - return ret; + return 0; } /** -- cgit v0.10.2 From f3b8c0caca02001565cb14b845bf90f59dea8213 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Sat, 19 Sep 2015 00:32:24 +0200 Subject: drm/vmwgfx: Fix uninitialized return in vmw_kms_helper_dirty() Function vmw_kms_helper_dirty() uses the uninitialized variable ret as return value. Make the result deterministic and directly return as the variable is unused anyway. Detected by Coverity CID 1324255. Signed-off-by: Christian Engelmayer Reviewed-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 61fb7f3..15a6c01 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1685,7 +1685,6 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, struct drm_crtc *crtc; u32 num_units = 0; u32 i, k; - int ret; dirty->dev_priv = dev_priv; @@ -1711,7 +1710,7 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, if (!dirty->cmd) { DRM_ERROR("Couldn't reserve fifo space " "for dirty blits.\n"); - return ret; + return -ENOMEM; } memset(dirty->cmd, 0, dirty->fifo_reserve_size); } -- cgit v0.10.2 From 30c64664f110f76064e364cb5dd385edc3751ba5 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 15 Sep 2015 01:11:42 -0700 Subject: drm: Allow also control clients to check the drm version This should be harmless. Vmware will, due to old infrastructure reasons, be using a privileged control client to supply GUI layout information rather than obtaining it from the device. That control client will be needing access to DRM version information. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Acked-by: David Herrmann diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 9a860ca..d93e737 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -520,7 +520,8 @@ EXPORT_SYMBOL(drm_ioctl_permit); /** Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { - DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, + DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), -- cgit v0.10.2 From 83510441bc08bee201c0ded9d81da6dfd008d69a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 24 Sep 2015 11:00:18 +0200 Subject: ALSA: hda/tegra - async probe for avoiding module loading deadlock The Tegra HD-audio controller driver causes deadlocks when loaded as a module since the driver invokes request_module() at binding with the codec driver. This patch works around it by deferring the probe in a work like Intel HD-audio controller driver does. Although hovering the codec probe stuff into udev would be a better solution, it may cause other regressions, so let's try this band-aid fix until the more proper solution gets landed. Reported-by: Thierry Reding Tested-by: Thierry Reding Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 477742c..58c0aad 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -73,6 +73,7 @@ struct hda_tegra { struct clk *hda2codec_2x_clk; struct clk *hda2hdmi_clk; void __iomem *regs; + struct work_struct probe_work; }; #ifdef CONFIG_PM @@ -294,7 +295,9 @@ static int hda_tegra_dev_disconnect(struct snd_device *device) static int hda_tegra_dev_free(struct snd_device *device) { struct azx *chip = device->device_data; + struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + cancel_work_sync(&hda->probe_work); if (azx_bus(chip)->chip_init) { azx_stop_all_streams(chip); azx_stop_chip(chip); @@ -426,6 +429,9 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) /* * constructor */ + +static void hda_tegra_probe_work(struct work_struct *work); + static int hda_tegra_create(struct snd_card *card, unsigned int driver_caps, struct hda_tegra *hda) @@ -452,6 +458,8 @@ static int hda_tegra_create(struct snd_card *card, chip->single_cmd = false; chip->snoop = true; + INIT_WORK(&hda->probe_work, hda_tegra_probe_work); + err = azx_bus_init(chip, NULL, &hda_tegra_io_ops); if (err < 0) return err; @@ -499,6 +507,21 @@ static int hda_tegra_probe(struct platform_device *pdev) card->private_data = chip; dev_set_drvdata(&pdev->dev, card); + schedule_work(&hda->probe_work); + + return 0; + +out_free: + snd_card_free(card); + return err; +} + +static void hda_tegra_probe_work(struct work_struct *work) +{ + struct hda_tegra *hda = container_of(work, struct hda_tegra, probe_work); + struct azx *chip = &hda->chip; + struct platform_device *pdev = to_platform_device(hda->dev); + int err; err = hda_tegra_first_init(chip, pdev); if (err < 0) @@ -520,11 +543,8 @@ static int hda_tegra_probe(struct platform_device *pdev) chip->running = 1; snd_hda_set_power_save(&chip->bus, power_save * 1000); - return 0; - -out_free: - snd_card_free(card); - return err; + out_free: + return; /* no error return from async probe */ } static int hda_tegra_remove(struct platform_device *pdev) -- cgit v0.10.2 From 7f57d803ee03730d570dc59a9e3e4842b58dd5cc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 24 Sep 2015 17:36:51 +0200 Subject: ALSA: hda - Disable power_save_node for Thinkpads Lenovo Thinkpads with recent Realtek codecs seem suffering from click noises at power transition since the introduction of widget power saving in 4.1 kernel. Although this might be solved by some delays in appropriate points, as a quick workaround, just disable the power_save_node feature for now. The gain it gives is relatively small, and this makes the situation back to pre 4.1 time. This patch ended up with a bit more code changes than usual because the existing fixup for Thinkpads is highly chained. Instead of adding yet another chain, combine a few of them into a single fixup entry, as a gratis cleanup. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=943982 Cc: # v4.1+ Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a75b561..afec6dc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4188,6 +4188,24 @@ static void alc_fixup_disable_aamix(struct hda_codec *codec, } } +/* fixup for Thinkpad docks: add dock pins, avoid HP parser fixup */ +static void alc_fixup_tpt440_dock(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x16, 0x21211010 }, /* dock headphone */ + { 0x19, 0x21a11010 }, /* dock mic */ + { } + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + codec->power_save_node = 0; /* avoid click noises */ + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4562,7 +4580,6 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, - ALC292_FIXUP_TPT440_DOCK2, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -5029,17 +5046,7 @@ static const struct hda_fixup alc269_fixups[] = { }, [ALC292_FIXUP_TPT440_DOCK] = { .type = HDA_FIXUP_FUNC, - .v.func = alc269_fixup_pincfg_no_hp_to_lineout, - .chained = true, - .chain_id = ALC292_FIXUP_TPT440_DOCK2 - }, - [ALC292_FIXUP_TPT440_DOCK2] = { - .type = HDA_FIXUP_PINS, - .v.pins = (const struct hda_pintbl[]) { - { 0x16, 0x21211010 }, /* dock headphone */ - { 0x19, 0x21a11010 }, /* dock mic */ - { } - }, + .v.func = alc_fixup_tpt440_dock, .chained = true, .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST }, -- cgit v0.10.2 From da314c9923fed553a007785a901fd395b7eb6c19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Sep 2015 11:38:56 +0800 Subject: netlink: Replace rhash_portid with bound On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote: > > store_release and load_acquire are different from the usual memory > barriers and can't be paired this way. You have to pair store_release > and load_acquire. Besides, it isn't a particularly good idea to OK I've decided to drop the acquire/release helpers as they don't help us at all and simply pessimises the code by using full memory barriers (on some architectures) where only a write or read barrier is needed. > depend on memory barriers embedded in other data structures like the > above. Here, especially, rhashtable_insert() would have write barrier > *before* the entry is hashed not necessarily *after*, which means that > in the above case, a socket which appears to have set bound to a > reader might not visible when the reader tries to look up the socket > on the hashtable. But you are right we do need an explicit write barrier here to ensure that the hashing is visible. > There's no reason to be overly smart here. This isn't a crazy hot > path, write barriers tend to be very cheap, store_release more so. > Please just do smp_store_release() and note what it's paired with. It's not about being overly smart. It's about actually understanding what's going on with the code. I've seen too many instances of people simply sprinkling synchronisation primitives around without any knowledge of what is happening underneath, which is just a recipe for creating hard-to-debug races. > > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, > > } > > } > > > > - if (!nlk->portid) { > > + if (!nlk->bound) { > > I don't think you can skip load_acquire here just because this is the > second deref of the variable. That doesn't change anything. Race > condition could still happen between the first and second tests and > skipping the second would lead to the same kind of bug. The reason this one is OK is because we do not use nlk->portid or try to get nlk from the hash table before we return to user-space. However, there is a real bug here that none of these acquire/release helpers discovered. The two bound tests here used to be a single one. Now that they are separate it is entirely possible for another thread to come in the middle and bind the socket. So we need to repeat the portid check in order to maintain consistency. > > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, > > !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) > > return -EPERM; > > > > - if (!nlk->portid) > > + if (!nlk->bound) > > Don't we need load_acquire here too? Is this path holding a lock > which makes that unnecessary? Ditto. ---8<--- The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink: Fix autobind race condition that leads to zero port ID") created some new races that can occur due to inconcsistencies between the two port IDs. Tejun is right that a barrier is unavoidable. Therefore I am reverting to the original patch that used a boolean to indicate that a user netlink socket has been bound. Barriers have been added where necessary to ensure that a valid portid and the hashed socket is visible. I have also changed netlink_insert to only return EBUSY if the socket is bound to a portid different to the requested one. This combined with only reading nlk->bound once in netlink_bind fixes a race where two threads that bind the socket at the same time with different port IDs may both succeed. Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID") Reported-by: Tejun Heo Reported-by: Linus Torvalds Signed-off-by: Herbert Xu Nacked-by: Tejun Heo Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9f51608..8f060d7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1031,7 +1031,7 @@ static inline int netlink_compare(struct rhashtable_compare_arg *arg, const struct netlink_compare_arg *x = arg->key; const struct netlink_sock *nlk = ptr; - return nlk->rhash_portid != x->portid || + return nlk->portid != x->portid || !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); } @@ -1057,7 +1057,7 @@ static int __netlink_insert(struct netlink_table *table, struct sock *sk) { struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); return rhashtable_lookup_insert_key(&table->hash, &arg, &nlk_sk(sk)->node, netlink_rhashtable_params); @@ -1110,8 +1110,8 @@ static int netlink_insert(struct sock *sk, u32 portid) lock_sock(sk); - err = -EBUSY; - if (nlk_sk(sk)->portid) + err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; + if (nlk_sk(sk)->bound) goto err; err = -ENOMEM; @@ -1119,7 +1119,7 @@ static int netlink_insert(struct sock *sk, u32 portid) unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; - nlk_sk(sk)->rhash_portid = portid; + nlk_sk(sk)->portid = portid; sock_hold(sk); err = __netlink_insert(table, sk); @@ -1135,7 +1135,9 @@ static int netlink_insert(struct sock *sk, u32 portid) goto err; } - nlk_sk(sk)->portid = portid; + /* We need to ensure that the socket is hashed and visible. */ + smp_wmb(); + nlk_sk(sk)->bound = portid; err: release_sock(sk); @@ -1521,6 +1523,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; long unsigned int groups = nladdr->nl_groups; + bool bound; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1537,9 +1540,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) + bound = nlk->bound; + if (bound) { + /* Ensure nlk->portid is up-to-date. */ + smp_rmb(); + if (nladdr->nl_pid != nlk->portid) return -EINVAL; + } if (nlk->netlink_bind && groups) { int group; @@ -1555,7 +1563,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, } } - if (!nlk->portid) { + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); @@ -1603,7 +1614,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->portid) + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!nlk->bound) err = netlink_autobind(sock); if (err == 0) { @@ -2444,10 +2458,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->portid) { + if (!nlk->bound) { err = netlink_autobind(sock); if (err) goto out; + } else { + /* Ensure nlk is hashed and visible. */ + smp_rmb(); } /* It's a really convoluted way for userland to ask for mmaped @@ -3273,7 +3290,7 @@ static inline u32 netlink_hash(const void *data, u32 len, u32 seed) const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 80b2b75..14437d9 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -25,7 +25,6 @@ struct netlink_ring { struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 rhash_portid; u32 portid; u32 dst_portid; u32 dst_group; @@ -36,6 +35,7 @@ struct netlink_sock { unsigned long state; size_t max_recvmsg_len; wait_queue_head_t wait; + bool bound; bool cb_running; struct netlink_callback cb; struct mutex *cb_mutex; -- cgit v0.10.2 From 9badce000e2ce68ba74838a3cd356dde58221c2f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Sep 2015 17:07:29 -0400 Subject: cgroup, writeback: don't enable cgroup writeback on traditional hierarchies inode_cgwb_enabled() gates cgroup writeback support. If it returns true, each inode is attached to the corresponding memory domain which gets mapped to io domain. It currently only tests whether the filesystem and bdi support cgroup writeback; however, cgroup writeback support doesn't work on traditional hierarchies and thus it should also test whether memcg and iocg are on the default hierarchy. This caused traditional hierarchy setups to hit the cgroup writeback path inadvertently and ended up creating separate writeback domains for each memcg and mapping them all to the root iocg uncovering a couple issues in the cgroup writeback path. cgroup writeback was never meant to be enabled on traditional hierarchies. Make inode_cgwb_enabled() test whether both memcg and iocg are on the default hierarchy. Signed-off-by: Tejun Heo Reported-by: Artem Bityutskiy Reported-by: Dexuan Cui Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com Link: http://lkml.kernel.org/g/f30d4a6aa8a546ff88f73021d026a453@SIXPR30MB031.064d.mgd.msft.net diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5a5d79e..d5eb4ad1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -252,13 +253,19 @@ int inode_congested(struct inode *inode, int cong_bits); * @inode: inode of interest * * cgroup writeback requires support from both the bdi and filesystem. - * Test whether @inode has both. + * Also, both memcg and iocg have to be on the default hierarchy. Test + * whether all conditions are met. + * + * Note that the test result may change dynamically on the same inode + * depending on how memcg and iocg are configured. */ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return bdi_cap_account_dirty(bdi) && + return cgroup_on_dfl(mem_cgroup_root_css->cgroup) && + cgroup_on_dfl(blkcg_root_css->cgroup) && + bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v0.10.2 From 6ae459bdaaeebc632b16e54dcbabb490c6931d61 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 22 Sep 2015 12:57:53 -0700 Subject: skbuff: Fix skb checksum flag on skb pull VXLAN device can receive skb with checksum partial. But the checksum offset could be in outer header which is pulled on receive. This results in negative checksum offset for the skb. Such skb can cause the assert failure in skb_checksum_help(). Following patch fixes the bug by setting checksum-none while pulling outer header. Following is the kernel panic msg from old kernel hitting the bug. ------------[ cut here ]------------ kernel BUG at net/core/dev.c:1906! RIP: 0010:[] skb_checksum_help+0x144/0x150 Call Trace: [] queue_userspace_packet+0x408/0x470 [openvswitch] [] ovs_dp_upcall+0x5d/0x60 [openvswitch] [] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch] [] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch] [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] vxlan_rcv+0x53/0x60 [openvswitch] [] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch] [] udp_queue_rcv_skb+0x2dc/0x3b0 [] __udp4_lib_rcv+0x1cf/0x6c0 [] udp_rcv+0x1a/0x20 [] ip_local_deliver_finish+0xdd/0x280 [] ip_local_deliver+0x88/0x90 [] ip_rcv_finish+0x10d/0x370 [] ip_rcv+0x235/0x300 [] __netif_receive_skb+0x55d/0x620 [] netif_receive_skb+0x80/0x90 [] virtnet_poll+0x555/0x6f0 [] net_rx_action+0x134/0x290 [] __do_softirq+0xa8/0x210 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x8e/0xb0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6e/0x6e Reported-by: Anupam Chanda Signed-off-by: Pravin B Shelar Acked-by: Tom Herbert Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9987af0..2b0a30a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2707,6 +2707,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) <= len) + skb->ip_summed = CHECKSUM_NONE; } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); -- cgit v0.10.2 From d5b8d6404395641987db76e28334cae4cef771ae Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 21 Sep 2015 16:47:09 +0100 Subject: net: gianfar: remove misuse of IRQF_NO_SUSPEND flag The device is set as wakeup capable using proper wakeup API but the driver misuses IRQF_NO_SUSPEND to set the interrupt as wakeup source which is incorrect. This patch removes the use of IRQF_NO_SUSPEND flags replacing it with enable_irq_wake instead. Cc: "David S. Miller" Cc: Claudiu Manoil Cc: Kevin Hao Cc: netdev@vger.kernel.org Signed-off-by: Sudeep Holla Acked-by: Claudiu Manoil Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4b69d061..803ed4c9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1970,8 +1970,7 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) /* Install our interrupt handlers for Error, * Transmit, and Receive */ - err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, - IRQF_NO_SUSPEND, + err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0, gfar_irq(grp, ER)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", @@ -1979,6 +1978,8 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) goto err_irq_fail; } + enable_irq_wake(gfar_irq(grp, ER)->irq); + err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0, gfar_irq(grp, TX)->name, grp); if (err < 0) { @@ -1994,14 +1995,14 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) goto rx_irq_fail; } } else { - err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, - IRQF_NO_SUSPEND, + err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0, gfar_irq(grp, TX)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", gfar_irq(grp, TX)->irq); goto err_irq_fail; } + enable_irq_wake(gfar_irq(grp, TX)->irq); } return 0; -- cgit v0.10.2 From 63d008a4e9ee86614ca5671b7f3ba447df007190 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 22 Sep 2015 18:12:11 +0200 Subject: ipv4: send arp replies to the correct tunnel When using ip lwtunnels, the additional data for xmit (basically, the actual tunnel to use) are carried in ip_tunnel_info either in dst->lwtstate or in metadata dst. When replying to ARP requests, we need to send the reply to the same tunnel the request came from. This means we need to construct proper metadata dst for ARP replies. We could perform another route lookup to get a dst entry with the correct lwtstate. However, this won't always ensure that the outgoing tunnel is the same as the incoming one, and it won't work anyway for IPv4 duplicate address detection. The only thing to do is to "reverse" the ip_tunnel_info. Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a3ba..f6dafec 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -276,6 +276,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); +struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, + gfp_t flags); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, int gso_type_mask); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 30409b7..f03db8b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -113,6 +113,8 @@ #include #include #include +#include +#include #include @@ -296,7 +298,8 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, - const unsigned char *target_hw, struct sk_buff *oskb) + const unsigned char *target_hw, + struct dst_entry *dst) { struct sk_buff *skb; @@ -309,9 +312,7 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip, if (!skb) return; - if (oskb) - skb_dst_copy(skb, oskb); - + skb_dst_set(skb, dst); arp_xmit(skb); } @@ -333,6 +334,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) __be32 target = *(__be32 *)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev; + struct dst_entry *dst = NULL; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -381,9 +383,10 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } } + if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE)) + dst = dst_clone(skb_dst(skb)); arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, - dst_hw, dev->dev_addr, NULL, - dev->priv_flags & IFF_XMIT_DST_RELEASE ? NULL : skb); + dst_hw, dev->dev_addr, NULL, dst); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) @@ -649,6 +652,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) int addr_type; struct neighbour *n; struct net *net = dev_net(dev); + struct dst_entry *reply_dst = NULL; bool is_garp = false; /* arp_rcv below verifies the ARP header and verifies the device @@ -749,13 +753,18 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) * cache. */ + if (arp->ar_op == htons(ARPOP_REQUEST) && skb_metadata_dst(skb)) + reply_dst = (struct dst_entry *) + iptunnel_metadata_reply(skb_metadata_dst(skb), + GFP_ATOMIC); + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, - dev->dev_addr, sha); + arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, + sha, dev->dev_addr, sha, reply_dst); goto out; } @@ -774,9 +783,10 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) if (!dont_send) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) { - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, - dev, tip, sha, dev->dev_addr, - sha); + arp_send_dst(ARPOP_REPLY, ETH_P_ARP, + sip, dev, tip, sha, + dev->dev_addr, sha, + reply_dst); neigh_release(n); } } @@ -794,9 +804,10 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) { - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, - dev, tip, sha, dev->dev_addr, - sha); + arp_send_dst(ARPOP_REPLY, ETH_P_ARP, + sip, dev, tip, sha, + dev->dev_addr, sha, + reply_dst); } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9b97204..ce3a1e7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -46,6 +46,7 @@ #include #include #include +#include int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, @@ -119,6 +120,33 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) } EXPORT_SYMBOL_GPL(iptunnel_pull_header); +struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, + gfp_t flags) +{ + struct metadata_dst *res; + struct ip_tunnel_info *dst, *src; + + if (!md || md->u.tun_info.mode & IP_TUNNEL_INFO_TX) + return NULL; + + res = metadata_dst_alloc(0, flags); + if (!res) + return NULL; + + dst = &res->u.tun_info; + src = &md->u.tun_info; + dst->key.tun_id = src->key.tun_id; + if (src->mode & IP_TUNNEL_INFO_IPV6) + memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src, + sizeof(struct in6_addr)); + else + dst->key.u.ipv4.dst = src->key.u.ipv4.src; + dst->mode = src->mode | IP_TUNNEL_INFO_TX; + + return res; +} +EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); + struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool csum_help, int gso_type_mask) -- cgit v0.10.2 From b194f30c61efb0767a98f47a64530baa8b731670 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 22 Sep 2015 18:12:12 +0200 Subject: lwtunnel: remove source and destination UDP port config option The UDP tunnel config is asymmetric wrt. to the ports used. The source and destination ports from one direction of the tunnel are not related to the ports of the other direction. We need to be able to respond to ARP requests using the correct ports without involving routing. As the consequence, UDP ports need to be fixed property of the tunnel interface and cannot be set per route. Remove the ability to set ports per route. This is still okay to do, as no kernel has been released with these attributes yet. Note that the ability to specify source and destination ports is preserved for other users of the lwtunnel API which don't use routes for tunnel key specification (like openvswitch). If in the future we rework ARP handling to allow port specification, the attributes can be added back. Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 34141a5..f8b0188 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -21,8 +21,6 @@ enum lwtunnel_ip_t { LWTUNNEL_IP_SRC, LWTUNNEL_IP_TTL, LWTUNNEL_IP_TOS, - LWTUNNEL_IP_SPORT, - LWTUNNEL_IP_DPORT, LWTUNNEL_IP_FLAGS, __LWTUNNEL_IP_MAX, }; @@ -36,8 +34,6 @@ enum lwtunnel_ip6_t { LWTUNNEL_IP6_SRC, LWTUNNEL_IP6_HOPLIMIT, LWTUNNEL_IP6_TC, - LWTUNNEL_IP6_SPORT, - LWTUNNEL_IP6_DPORT, LWTUNNEL_IP6_FLAGS, __LWTUNNEL_IP6_MAX, }; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index ce3a1e7..84dce6a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -226,8 +226,6 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_SRC] = { .type = NLA_U32 }, [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, - [LWTUNNEL_IP_SPORT] = { .type = NLA_U16 }, - [LWTUNNEL_IP_DPORT] = { .type = NLA_U16 }, [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, }; @@ -267,12 +265,6 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, if (tb[LWTUNNEL_IP_TOS]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); - if (tb[LWTUNNEL_IP_SPORT]) - tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]); - - if (tb[LWTUNNEL_IP_DPORT]) - tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP_DPORT]); - if (tb[LWTUNNEL_IP_FLAGS]) tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); @@ -294,8 +286,6 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) || - nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) || nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; @@ -309,8 +299,6 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ + nla_total_size(1) /* LWTUNNEL_IP_TTL */ - + nla_total_size(2) /* LWTUNNEL_IP_SPORT */ - + nla_total_size(2) /* LWTUNNEL_IP_DPORT */ + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */ } @@ -333,8 +321,6 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) }, [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 }, [LWTUNNEL_IP6_TC] = { .type = NLA_U8 }, - [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 }, - [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 }, [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, }; @@ -374,12 +360,6 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, if (tb[LWTUNNEL_IP6_TC]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); - if (tb[LWTUNNEL_IP6_SPORT]) - tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]); - - if (tb[LWTUNNEL_IP6_DPORT]) - tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]); - if (tb[LWTUNNEL_IP6_FLAGS]) tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); @@ -401,8 +381,6 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) || - nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) || nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; @@ -416,8 +394,6 @@ static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ + nla_total_size(1) /* LWTUNNEL_IP6_TC */ - + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */ - + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */ + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */ } -- cgit v0.10.2 From d8aecb10115497f6cdf841df8c88ebb3ba25fa28 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 22 Sep 2015 17:01:11 -0700 Subject: net: revert "net_sched: move tp->root allocation into fw_init()" fw filter uses tp->root==NULL to check if it is the old method, so it doesn't need allocation at all in this case. This patch reverts the offending commit and adds some comments for old method to make it obvious. Fixes: 33f8b9ecdb15 ("net_sched: move tp->root allocation into fw_init()") Reported-by: Akshat Kakkar Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 715e01e..f23a3b6 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,7 +33,6 @@ struct fw_head { u32 mask; - bool mask_set; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; @@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { - /* old method */ + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ tp->q->handle)))) { res->classid = id; @@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) static int fw_init(struct tcf_proto *tp) { - struct fw_head *head; - - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; - - head->mask_set = false; - rcu_assign_pointer(tp->root, head); + /* We don't allocate fw_head here, because in the old method + * we don't need it at all. + */ return 0; } @@ -252,7 +246,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, int err; if (!opt) - return handle ? -EINVAL : 0; + return handle ? -EINVAL : 0; /* Succeed if it is old method. */ err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); if (err < 0) @@ -302,11 +296,17 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!handle) return -EINVAL; - if (!head->mask_set) { - head->mask = 0xFFFFFFFF; + if (!head) { + u32 mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) - head->mask = nla_get_u32(tb[TCA_FW_MASK]); - head->mask_set = true; + mask = nla_get_u32(tb[TCA_FW_MASK]); + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + head->mask = mask; + + rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); -- cgit v0.10.2 From d682d2bdc30650a5c7ce9908ab83ab674b658744 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Sep 2015 17:04:58 -0700 Subject: bnx2x: byte swap rss_key to comply to Toeplitz specs After a good amount of debugging, I found bnx2x was byte swaping the 40 bytes of rss_key. If we byte swap the key, then bnx2x generates hashes matching MSDN specs as documented in (Verifying the RSS Hash Calculation) https://msdn.microsoft.com/en-us/library/windows/hardware/ff571021% 28v=vs.85%29.aspx It is mostly a non issue, unless we want to mix different NIC in a host, and want consistent hashing among all of them, ie if they all use the boot time generated rss key, or if some application is choosing specific tuple(s) so that incoming traffic lands into known rx queue(s). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index c9bd7f1..ff702a7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -4319,8 +4319,16 @@ static int bnx2x_setup_rss(struct bnx2x *bp, /* RSS keys */ if (test_bit(BNX2X_RSS_SET_SRCH, &p->rss_flags)) { - memcpy(&data->rss_key[0], &p->rss_key[0], - sizeof(data->rss_key)); + u8 *dst = (u8 *)(data->rss_key) + sizeof(data->rss_key); + const u8 *src = (const u8 *)p->rss_key; + int i; + + /* Apparently, bnx2x reads this array in reverse order + * We need to byte swap rss_key to comply with Toeplitz specs. + */ + for (i = 0; i < sizeof(data->rss_key); i++) + *--dst = *src++; + caps |= ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY; } -- cgit v0.10.2 From 38ea72bdb65df2f40ec77b2c9d1413e7f5e34465 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 Sep 2015 15:08:54 -0600 Subject: PCI/MSI: Fix MSI IRQ domains for VFs on virtual buses SR-IOV creates a virtual bus where bus->self is NULL. When we add VFs and scan for an MSI domain, pci_set_bus_msi_domain() dereferences bus->self, which causes a kernel NULL pointer dereference oops. Scan up to the parent bus until we find a real bridge where we can get the MSI domain. [bhelgaas: changelog] Fixes: 44aa0c657e3e ("PCI/MSI: Add hooks to populate the msi_domain field") Tested-by: Joerg Roedel Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Marc Zyngier diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c8cc0e62..8361d27 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -676,15 +676,20 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) static void pci_set_bus_msi_domain(struct pci_bus *bus) { struct irq_domain *d; + struct pci_bus *b; /* - * Either bus is the root, and we must obtain it from the - * firmware, or we inherit it from the bridge device. + * The bus can be a root bus, a subordinate bus, or a virtual bus + * created by an SR-IOV device. Walk up to the first bridge device + * found or derive the domain from the host bridge. */ - if (pci_is_root_bus(bus)) - d = pci_host_bridge_msi_domain(bus); - else - d = dev_get_msi_domain(&bus->self->dev); + for (b = bus, d = NULL; !d && !pci_is_root_bus(b); b = b->parent) { + if (b->self) + d = dev_get_msi_domain(&b->self->dev); + } + + if (!d) + d = pci_host_bridge_msi_domain(b); dev_set_msi_domain(&bus->dev, d); } -- cgit v0.10.2 From 9d9240756e63dd87d6cbf5da8b98ceb8f8192b55 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 Sep 2015 11:17:21 -0600 Subject: PCI: Fix devfn for VPD access through function 0 Commit 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") passes PCI_SLOT(devfn) for the devfn parameter of pci_get_slot(). Generally this works because we're fairly well guaranteed that a PCIe device is at slot address 0, but for the general case, including conventional PCI, it's incorrect. We need to get the slot and then convert it back into a devfn. Fixes: 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe Acked-by: Mark Rustad CC: stable@vger.kernel.org diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 769f7e3..5a5f0a7 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -442,7 +442,8 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, void *arg) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ssize_t ret; if (!tdev) @@ -456,7 +457,8 @@ static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, const void *arg) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ssize_t ret; if (!tdev) @@ -475,7 +477,8 @@ static const struct pci_vpd_ops pci_vpd_f0_ops = { static int pci_vpd_f0_dev_check(struct pci_dev *dev) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); int ret = 0; if (!tdev) -- cgit v0.10.2 From da2d03ea27f6ed9d2005a67b20dd021ddacf1e4d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 Sep 2015 22:24:46 -0600 Subject: PCI: Use function 0 VPD for identical functions, regular VPD for others 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") added PCI_DEV_FLAGS_VPD_REF_F0. Previously, we set the flag on every non-zero function of quirked devices. If a function turned out to be different from function 0, i.e., it had a different class, vendor ID, or device ID, the flag remained set but we didn't make VPD accessible at all. Flip this around so we only set PCI_DEV_FLAGS_VPD_REF_F0 for functions that are identical to function 0, and allow regular VPD access for any other functions. [bhelgaas: changelog, stable tag] Fixes: 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe Acked-by: Mark Rustad CC: stable@vger.kernel.org diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 5a5f0a7..59ac36f 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -475,23 +475,6 @@ static const struct pci_vpd_ops pci_vpd_f0_ops = { .release = pci_vpd_pci22_release, }; -static int pci_vpd_f0_dev_check(struct pci_dev *dev) -{ - struct pci_dev *tdev = pci_get_slot(dev->bus, - PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); - int ret = 0; - - if (!tdev) - return -ENODEV; - if (!tdev->vpd || !tdev->multifunction || - dev->class != tdev->class || dev->vendor != tdev->vendor || - dev->device != tdev->device) - ret = -ENODEV; - - pci_dev_put(tdev); - return ret; -} - int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -500,12 +483,7 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; - if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { - int ret = pci_vpd_f0_dev_check(dev); - if (ret) - return ret; - } vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6a30252..b03373f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1907,11 +1907,27 @@ static void quirk_netmos(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +/* + * Quirk non-zero PCI functions to route VPD access through function 0 for + * devices that share VPD resources between functions. The functions are + * expected to be identical devices. + */ static void quirk_f0_vpd_link(struct pci_dev *dev) { - if (!dev->multifunction || !PCI_FUNC(dev->devfn)) + struct pci_dev *f0; + + if (!PCI_FUNC(dev->devfn)) return; - dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; + + f0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + if (!f0) + return; + + if (f0->vpd && dev->class == f0->class && + dev->vendor == f0->vendor && dev->device == f0->device) + dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; + + pci_dev_put(f0); } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link); -- cgit v0.10.2 From de24c18c0faca5ebd618e1cb87f5489745e40475 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 12 Sep 2015 02:06:09 +0300 Subject: PCI: rcar: Add R8A7794 support Add Renesas R8A7794 SoC support to the Renesas R-Car gen2 PCI driver. Signed-off-by: Sergei Shtylyov Signed-off-by: Bjorn Helgaas Acked-by: Simon Horman diff --git a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt index d8ef5bf..7fab84b 100644 --- a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt +++ b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt @@ -7,7 +7,8 @@ OHCI and EHCI controllers. Required properties: - compatible: "renesas,pci-r8a7790" for the R8A7790 SoC; - "renesas,pci-r8a7791" for the R8A7791 SoC. + "renesas,pci-r8a7791" for the R8A7791 SoC; + "renesas,pci-r8a7794" for the R8A7794 SoC. - reg: A list of physical regions to access the device: the first is the operational registers for the OHCI/EHCI controllers and the second is for the bridge configuration and control registers. diff --git a/drivers/pci/host/pci-rcar-gen2.c b/drivers/pci/host/pci-rcar-gen2.c index 367e28f..c4f64bf 100644 --- a/drivers/pci/host/pci-rcar-gen2.c +++ b/drivers/pci/host/pci-rcar-gen2.c @@ -362,6 +362,7 @@ static int rcar_pci_probe(struct platform_device *pdev) static struct of_device_id rcar_pci_of_match[] = { { .compatible = "renesas,pci-r8a7790", }, { .compatible = "renesas,pci-r8a7791", }, + { .compatible = "renesas,pci-r8a7794", }, { }, }; -- cgit v0.10.2 From 41fc014332d91ee90c32840bf161f9685b7fbf2b Mon Sep 17 00:00:00 2001 From: Wilson Kok Date: Tue, 22 Sep 2015 21:40:22 -0700 Subject: fib_rules: fix fib rule dumps across multiple skbs dump_rules returns skb length and not error. But when family == AF_UNSPEC, the caller of dump_rules assumes that it returns an error. Hence, when family == AF_UNSPEC, we continue trying to dump on -EMSGSIZE errors resulting in incorrect dump idx carried between skbs belonging to the same dump. This results in fib rule dump always only dumping rules that fit into the first skb. This patch fixes dump_rules to return error so that we exit correctly and idx is correctly maintained between skbs that are part of the same dump. Signed-off-by: Wilson Kok Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bf77e36..365de66 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -631,15 +631,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, { int idx = 0; struct fib_rule *rule; + int err = 0; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWRULE, - NLM_F_MULTI, ops) < 0) + err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops); + if (err) break; skip: idx++; @@ -648,7 +650,7 @@ skip: cb->args[1] = idx; rules_ops_put(ops); - return skb->len; + return err; } static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) @@ -664,7 +666,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) if (ops == NULL) return -EAFNOSUPPORT; - return dump_rules(skb, cb, ops); + dump_rules(skb, cb, ops); + + return skb->len; } rcu_read_lock(); -- cgit v0.10.2 From a46496ce38eeb401344d5623c1960dbf2f1769be Mon Sep 17 00:00:00 2001 From: Matt Bennett Date: Wed, 23 Sep 2015 16:58:31 +1200 Subject: ip6_gre: Reduce log level in ip6gre_err() to debug Currently error log messages in ip6gre_err are printed at 'warn' level. This is different to most other tunnel types which don't print any messages. These log messages don't provide any information that couldn't be deduced with networking tools. Also it can be annoying to have one end of the tunnel go down and have the logs fill with pointless messages such as "Path to destination invalid or inactive!". This patch reduces the log level of these messages to 'dbg' level to bring the visible behaviour into line with other tunnel types. Signed-off-by: Matt Bennett Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6465124..3c7b931 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -404,13 +404,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_tlv_tnl_enc_lim *tel; __u32 mtu; case ICMPV6_DEST_UNREACH: - net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", - t->parms.name); + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); break; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { - net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); } break; case ICMPV6_PARAMPROB: @@ -421,12 +421,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (teli && teli == be32_to_cpu(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { - net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); } } else { - net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", - t->parms.name); + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); } break; case ICMPV6_PKT_TOOBIG: -- cgit v0.10.2 From e7ae65ced7dd71aa3dc29bda7a94ac82d9ea7751 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 21 Sep 2015 14:57:25 +0200 Subject: gpio: mention in DT binding doc that -gpio is deprecated The gpiolib supports parsing DT properties of the form -gpio but it was only added for compatibility with older DT bindings that got it wrong and should not be used in newer bindings. The commit that added support for this was: dd34c37aa3e8 ("gpio: of: Allow -gpio suffix for property names") but didn't update the documentation to explain this so it's been a source of confusion. So let's make this clear in the GPIO DT binding doc. Signed-off-by: Javier Martinez Canillas Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt index 5788d5c..82d40e2 100644 --- a/Documentation/devicetree/bindings/gpio/gpio.txt +++ b/Documentation/devicetree/bindings/gpio/gpio.txt @@ -16,7 +16,9 @@ properties, each containing a 'gpio-list': GPIO properties should be named "[-]gpios", with being the purpose of this GPIO for the device. While a non-existent is considered valid for compatibility reasons (resolving to the "gpios" property), it is not allowed -for new bindings. +for new bindings. Also, GPIO properties named "[-]gpio" are valid and old +bindings use it, but are only supported for compatibility reasons and should not +be used for newer bindings since it has been deprecated. GPIO properties can contain one or more GPIO phandles, but only in exceptional cases should they contain more than one. If your device uses several GPIOs with -- cgit v0.10.2 From a13f18f59d2646754cda3662a9e215ff43e7a7d5 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 24 Sep 2015 15:53:56 +0100 Subject: Documentation: arm: Fix typo in the idle-states bindings examples The idle-states bindings mandate that the entry-method string in the idle-states node must be "psci" for ARM v8 64-bit systems, but the examples in the bindings report a wrong entry-method string. Owing to this typo, some dts in the kernel wrongly defined the entry-method property, since they likely cut and pasted the example definition without paying attention to the bindings definitions. This patch fixes the typo in the DT idle states bindings examples and respective dts in the kernel so that the bindings and related dts files are made compliant. Signed-off-by: Lorenzo Pieralisi Cc: Howard Chen Cc: Rob Herring Cc: Mark Rutland Cc: Heiko Stuebner Signed-off-by: Rob Herring diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt index a8274ea..b8e41c1 100644 --- a/Documentation/devicetree/bindings/arm/idle-states.txt +++ b/Documentation/devicetree/bindings/arm/idle-states.txt @@ -497,7 +497,7 @@ cpus { }; idle-states { - entry-method = "arm,psci"; + entry-method = "psci"; CPU_RETENTION_0_0: cpu-retention-0-0 { compatible = "arm,idle-state"; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index d18ee42..06a1564 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -81,7 +81,7 @@ }; idle-states { - entry-method = "arm,psci"; + entry-method = "psci"; CPU_SLEEP_0: cpu-sleep-0 { compatible = "arm,idle-state"; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index a712bea..cc093a4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -106,7 +106,7 @@ }; idle-states { - entry-method = "arm,psci"; + entry-method = "psci"; cpu_sleep: cpu-sleep-0 { compatible = "arm,idle-state"; -- cgit v0.10.2 From 17a10c9215b39a5ea8faeb241759c1aee6553919 Mon Sep 17 00:00:00 2001 From: Matt Bennett Date: Fri, 25 Sep 2015 11:01:47 +1200 Subject: ip6_tunnel: Reduce log level in ip6_tnl_err() to debug Currently error log messages in ip6_tnl_err are printed at 'warn' level. This is different to other tunnel types which don't print any messages. These log messages don't provide any information that couldn't be deduced with networking tools. Also it can be annoying to have one end of the tunnel go down and have the logs fill with pointless messages such as "Path to destination invalid or inactive!". This patch reduces the log level of these messages to 'dbg' level to bring the visible behaviour into line with other tunnel types. Signed-off-by: Matt Bennett Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 983f0d2..eabffbb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -569,14 +569,14 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, struct ipv6_tlv_tnl_enc_lim *tel; __u32 mtu; case ICMPV6_DEST_UNREACH: - net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", - t->parms.name); + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); rel_msg = 1; break; case ICMPV6_TIME_EXCEED: if ((*code) == ICMPV6_EXC_HOPLIMIT) { - net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } break; @@ -588,13 +588,13 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { - net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } } else { - net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", - t->parms.name); + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); } break; case ICMPV6_PKT_TOOBIG: -- cgit v0.10.2 From a136442131443d929d2d8d243157824de4dfbae8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:35:52 +0100 Subject: phy: fix of_mdio_find_bus() device refcount leak of_mdio_find_bus() leaks a struct device refcount, caused by using class_find_device() and not realising that the device reference has its refcount incremented: * Note, you will need to drop the reference with put_device() after use. ... while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { get_device(dev); break; } Update the comment, and arrange for the phy code to drop this refcount when disposing of a reference to it. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 4d4d25e..280c7c3 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -113,18 +113,18 @@ int mdio_mux_init(struct device *dev, if (!parent_bus_node) return -ENODEV; - parent_bus = of_mdio_find_bus(parent_bus_node); - if (parent_bus == NULL) { - ret_val = -EPROBE_DEFER; - goto err_parent_bus; - } - pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL); if (pb == NULL) { ret_val = -ENOMEM; goto err_parent_bus; } + parent_bus = of_mdio_find_bus(parent_bus_node); + if (parent_bus == NULL) { + ret_val = -EPROBE_DEFER; + goto err_parent_bus; + } + pb->switch_data = data; pb->switch_fn = switch_fn; pb->current_child = -1; @@ -173,6 +173,10 @@ int mdio_mux_init(struct device *dev, dev_info(dev, "Version " DRV_VERSION "\n"); return 0; } + + /* balance the reference of_mdio_find_bus() took */ + put_device(&pb->mii_bus->dev); + err_parent_bus: of_node_put(parent_bus_node); return ret_val; @@ -189,6 +193,9 @@ void mdio_mux_uninit(void *mux_handle) mdiobus_free(cb->mii_bus); cb = cb->next; } + + /* balance the reference of_mdio_find_bus() in mdio_mux_init() took */ + put_device(&pb->mii_bus->dev); } EXPORT_SYMBOL_GPL(mdio_mux_uninit); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 02a4615..67553e1 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -167,7 +167,9 @@ static int of_mdio_bus_match(struct device *dev, const void *mdio_bus_np) * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. * @mdio_bus_np: Pointer to the mii_bus. * - * Returns a pointer to the mii_bus, or NULL if none found. + * Returns a reference to the mii_bus, or NULL if none found. The + * embedded struct device will have its reference count incremented, + * and this must be put once the bus is finished with. * * Because the association of a device_node and mii_bus is made via * of_mdiobus_register(), the mii_bus cannot be found before it is -- cgit v0.10.2 From e496ae690b2faff751e1849fb97b060615e21f28 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:35:57 +0100 Subject: net: dsa: fix of_mdio_find_bus() device refcount leak Current users of of_mdio_find_bus() leak a struct device refcount, as they fail to clean up the reference obtained inside class_find_device(). Fix the DSA code to properly refcount the returned MDIO bus by: 1. taking a reference on the struct device whenever we assign it to pd->chip[x].host_dev. 2. dropping the reference when we overwrite the existing reference. 3. dropping the reference when we free the data structure. 4. dropping the initial reference we obtained after setting up the platform data structure, or on failure. In step 2 above, where we obtain a new MDIO bus, there is no need to take a reference on it as we would only have to drop it immediately after assignment again, iow: put_device(cd->host_dev); /* drop original assignment ref */ cd->host_dev = get_device(&mdio_bus_switch->dev); /* get our ref */ put_device(&mdio_bus_switch->dev); /* drop of_mdio_find_bus ref */ Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 76e380076..bf4ba15 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -634,6 +634,10 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) port_index++; } kfree(pd->chip[i].rtable); + + /* Drop our reference to the MDIO bus device */ + if (pd->chip[i].host_dev) + put_device(pd->chip[i].host_dev); } kfree(pd->chip); } @@ -661,16 +665,22 @@ static int dsa_of_probe(struct device *dev) return -EPROBE_DEFER; ethernet = of_parse_phandle(np, "dsa,ethernet", 0); - if (!ethernet) - return -EINVAL; + if (!ethernet) { + ret = -EINVAL; + goto out_put_mdio; + } ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) - return -EPROBE_DEFER; + if (!ethernet_dev) { + ret = -EPROBE_DEFER; + goto out_put_mdio; + } pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return -ENOMEM; + if (!pd) { + ret = -ENOMEM; + goto out_put_mdio; + } dev->platform_data = pd; pd->of_netdev = ethernet_dev; @@ -691,7 +701,9 @@ static int dsa_of_probe(struct device *dev) cd = &pd->chip[chip_index]; cd->of_node = child; - cd->host_dev = &mdio_bus->dev; + + /* When assigning the host device, increment its refcount */ + cd->host_dev = get_device(&mdio_bus->dev); sw_addr = of_get_property(child, "reg", NULL); if (!sw_addr) @@ -711,6 +723,12 @@ static int dsa_of_probe(struct device *dev) ret = -EPROBE_DEFER; goto out_free_chip; } + + /* Drop the mdio_bus device ref, replacing the host + * device with the mdio_bus_switch device, keeping + * the refcount from of_mdio_find_bus() above. + */ + put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } @@ -744,6 +762,10 @@ static int dsa_of_probe(struct device *dev) } } + /* The individual chips hold their own refcount on the mdio bus, + * so drop ours */ + put_device(&mdio_bus->dev); + return 0; out_free_chip: @@ -751,6 +773,8 @@ out_free_chip: out_free: kfree(pd); dev->platform_data = NULL; +out_put_mdio: + put_device(&mdio_bus->dev); return ret; } -- cgit v0.10.2 From 3e3aaf649416988ca8be4ad2c52dc24d8be7b46e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:02 +0100 Subject: phy: fix mdiobus module safety Re-implement the mdiobus module refcounting to ensure that we actually ensure that the mdiobus module code does not go away while we might call into it. The old scheme using bus->dev.driver was buggy, because bus->dev is a class device which never has a struct device_driver associated with it, and hence the associated code trying to obtain a refcount did nothing useful. Instead, take the approach that other subsystems do: pass the module when calling mdiobus_register(), and record that in the mii_bus struct. When we need to increment the module use count in the phy code, use this stored pointer. When the phy is deteched, drop the module refcount, remembering that the phy device might go away at that point. This doesn't stop the mii_bus going away while there are in-use phys - it merely stops the underlying code vanishing. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 67553e1..99240662 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -244,7 +244,7 @@ static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, * * Returns 0 on success or < 0 on error. */ -int mdiobus_register(struct mii_bus *bus) +int __mdiobus_register(struct mii_bus *bus, struct module *owner) { int i, err; @@ -255,6 +255,7 @@ int mdiobus_register(struct mii_bus *bus) BUG_ON(bus->state != MDIOBUS_ALLOCATED && bus->state != MDIOBUS_UNREGISTERED); + bus->owner = owner; bus->dev.parent = bus->parent; bus->dev.class = &mdio_bus_class; bus->dev.groups = NULL; @@ -296,7 +297,7 @@ error: device_del(&bus->dev); return err; } -EXPORT_SYMBOL(mdiobus_register); +EXPORT_SYMBOL(__mdiobus_register); void mdiobus_unregister(struct mii_bus *bus) { diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c0f2111..03adf32 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -582,10 +582,15 @@ EXPORT_SYMBOL(phy_init_hw); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) { + struct mii_bus *bus = phydev->bus; struct device *d = &phydev->dev; - struct module *bus_module; int err; + if (!try_module_get(bus->owner)) { + dev_err(&dev->dev, "failed to get the bus module\n"); + return -EIO; + } + /* Assume that if there is no driver, that it doesn't * exist, and we should use the genphy driver. */ @@ -600,20 +605,13 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, err = device_bind_driver(d); if (err) - return err; + goto error; } if (phydev->attached_dev) { dev_err(&dev->dev, "PHY already attached\n"); - return -EBUSY; - } - - /* Increment the bus module reference count */ - bus_module = phydev->bus->dev.driver ? - phydev->bus->dev.driver->owner : NULL; - if (!try_module_get(bus_module)) { - dev_err(&dev->dev, "failed to get the bus module\n"); - return -EIO; + err = -EBUSY; + goto error; } phydev->attached_dev = dev; @@ -636,6 +634,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phy_resume(phydev); return err; + +error: + module_put(bus->owner); + return err; } EXPORT_SYMBOL(phy_attach_direct); @@ -680,11 +682,9 @@ EXPORT_SYMBOL(phy_attach); */ void phy_detach(struct phy_device *phydev) { + struct mii_bus *bus; int i; - if (phydev->bus->dev.driver) - module_put(phydev->bus->dev.driver->owner); - phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; phy_suspend(phydev); @@ -700,6 +700,10 @@ void phy_detach(struct phy_device *phydev) break; } } + + bus = phydev->bus; + + module_put(bus->owner); } EXPORT_SYMBOL(phy_detach); diff --git a/include/linux/phy.h b/include/linux/phy.h index 962387a..11bce44 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,7 @@ struct sk_buff; * PHYs should register using this structure */ struct mii_bus { + struct module *owner; const char *name; char id[MII_BUS_ID_SIZE]; void *priv; @@ -198,7 +200,8 @@ static inline struct mii_bus *mdiobus_alloc(void) return mdiobus_alloc_size(0); } -int mdiobus_register(struct mii_bus *bus); +int __mdiobus_register(struct mii_bus *bus, struct module *owner); +#define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); -- cgit v0.10.2 From 7322967bc1bd97ac9c49ecea19e5a1f681ca27ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:08 +0100 Subject: phy: add proper phy struct device refcounting Take a refcount on the phy struct device when the phy device is attached to a network device, and drop it after it's detached. This ensures that a refcount is held on the phy device while the device is being used by a network device, thereby preventing the phy_device from being unexpectedly kfree()'d by phy_device_release(). Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 03adf32..97a4f52 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -578,6 +578,7 @@ EXPORT_SYMBOL(phy_init_hw); * generic driver is used. The phy_device is given a ptr to * the attaching device, and given a callback for link status * change. The phy_device is returned to the attaching driver. + * This function takes a reference on the phy device. */ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) @@ -591,6 +592,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return -EIO; } + get_device(d); + /* Assume that if there is no driver, that it doesn't * exist, and we should use the genphy driver. */ @@ -636,6 +639,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return err; error: + put_device(d); module_put(bus->owner); return err; } @@ -679,6 +683,9 @@ EXPORT_SYMBOL(phy_attach); /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct + * + * This detaches the phy device from its network device and the phy + * driver, and drops the reference count taken in phy_attach_direct(). */ void phy_detach(struct phy_device *phydev) { @@ -701,8 +708,13 @@ void phy_detach(struct phy_device *phydev) } } + /* + * The phydev might go away on the put_device() below, so avoid + * a use-after-free bug by reading the underlying bus first. + */ bus = phydev->bus; + put_device(&phydev->dev); module_put(bus->owner); } EXPORT_SYMBOL(phy_detach); -- cgit v0.10.2 From f018ae7a8c576345d56a0cd40d86c0574a2eb360 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:13 +0100 Subject: of_mdio: fix MDIO phy device refcounting bus_find_device() is defined as: * This is similar to the bus_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. and it does indeed return a reference-counted pointer to the device: while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) ^^^^^^^^^^^^^^^ break; klist_iter_exit(&i); return dev; What that means is that when we're done with the struct device, we must drop that reference. Neither of_phy_connect() nor of_phy_attach() did this when phy_connect_direct() or phy_attach_direct() failed. With our previous patch, phy_connect_direct() and phy_attach_direct() take a new refcount on the phy device when successful, so we can drop our local reference immediatley after these functions, whether or not they succeeded. Signed-off-by: Russell King Acked-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 1350fa2..a87a868 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -197,7 +197,8 @@ static int of_phy_match(struct device *dev, void *phy_np) * of_phy_find_device - Give a PHY node, find the phy_device * @phy_np: Pointer to the phy's device tree node * - * Returns a pointer to the phy_device. + * If successful, returns a pointer to the phy_device with the embedded + * struct device refcount incremented by one, or NULL on failure. */ struct phy_device *of_phy_find_device(struct device_node *phy_np) { @@ -217,7 +218,9 @@ EXPORT_SYMBOL(of_phy_find_device); * @hndlr: Link state callback for the network device * @iface: PHY data interface type * - * Returns a pointer to the phy_device if successful. NULL otherwise + * If successful, returns a pointer to the phy_device with the embedded + * struct device refcount incremented by one, or NULL on failure. The + * refcount must be dropped by calling phy_disconnect() or phy_detach(). */ struct phy_device *of_phy_connect(struct net_device *dev, struct device_node *phy_np, @@ -225,13 +228,19 @@ struct phy_device *of_phy_connect(struct net_device *dev, phy_interface_t iface) { struct phy_device *phy = of_phy_find_device(phy_np); + int ret; if (!phy) return NULL; phy->dev_flags = flags; - return phy_connect_direct(dev, phy, hndlr, iface) ? NULL : phy; + ret = phy_connect_direct(dev, phy, hndlr, iface); + + /* refcount is held by phy_connect_direct() on success */ + put_device(&phy->dev); + + return ret ? NULL : phy; } EXPORT_SYMBOL(of_phy_connect); @@ -241,17 +250,27 @@ EXPORT_SYMBOL(of_phy_connect); * @phy_np: Node pointer for the PHY * @flags: flags to pass to the PHY * @iface: PHY data interface type + * + * If successful, returns a pointer to the phy_device with the embedded + * struct device refcount incremented by one, or NULL on failure. The + * refcount must be dropped by calling phy_disconnect() or phy_detach(). */ struct phy_device *of_phy_attach(struct net_device *dev, struct device_node *phy_np, u32 flags, phy_interface_t iface) { struct phy_device *phy = of_phy_find_device(phy_np); + int ret; if (!phy) return NULL; - return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy; + ret = phy_attach_direct(dev, phy, flags, iface); + + /* refcount is held by phy_attach_direct() on success */ + put_device(&phy->dev); + + return ret ? NULL : phy; } EXPORT_SYMBOL(of_phy_attach); -- cgit v0.10.2 From 04d53b20fe44afe635b3d4438b437f7a12927e9a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:18 +0100 Subject: net: fix phy refcounting in a bunch of drivers of_phy_find_device() increments the phy struct device refcount, which we need to properly balance. Add code to network drivers using this function to ensure that the struct device refcount is correctly balanced. For xgene, looking back in the history, we should be able to use of_phy_connect() with a zero flags argument for the DT case as this is how the driver used to operate prior to de7b5b3d790a ("net: eth: xgene: change APM X-Gene SoC platform ethernet to support ACPI"). This leaves the Cavium Thunder BGX unfixed; fixing this driver is a complicated task, one which the maintainers need to be involved with. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index cfa3704..c4bb802 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -689,16 +689,24 @@ static int xgene_enet_phy_connect(struct net_device *ndev) netdev_dbg(ndev, "No phy-handle found in DT\n"); return -ENODEV; } - pdata->phy_dev = of_phy_find_device(phy_np); - } - phy_dev = pdata->phy_dev; + phy_dev = of_phy_connect(ndev, phy_np, &xgene_enet_adjust_link, + 0, pdata->phy_mode); + if (!phy_dev) { + netdev_err(ndev, "Could not connect to PHY\n"); + return -ENODEV; + } + + pdata->phy_dev = phy_dev; + } else { + phy_dev = pdata->phy_dev; - if (!phy_dev || - phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link, - pdata->phy_mode)) { - netdev_err(ndev, "Could not connect to PHY\n"); - return -ENODEV; + if (!phy_dev || + phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link, + pdata->phy_mode)) { + netdev_err(ndev, "Could not connect to PHY\n"); + return -ENODEV; + } } pdata->phy_speed = SPEED_UNKNOWN; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 803ed4c9..a5cf433 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1702,6 +1702,7 @@ static void gfar_configure_serdes(struct net_device *dev) tbiphy = of_phy_find_device(priv->tbi_node); if (!tbiphy) { dev_err(&dev->dev, "error: Could not get TBI device\n"); + put_device(&tbiphy->dev); return; } @@ -1723,6 +1724,8 @@ static void gfar_configure_serdes(struct net_device *dev) phy_write(tbiphy, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); + + put_device(&tbiphy->dev); } static int __gfar_is_rx_idle(struct gfar_private *priv) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 4dd40e0..650f788 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1384,6 +1384,8 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) value = phy_read(tbiphy, ENET_TBI_MII_CR); value &= ~0x1000; /* Turn off autonegotiation */ phy_write(tbiphy, ENET_TBI_MII_CR, value); + + put_device(&tbiphy->dev); } init_check_frame_length_mode(ug_info->lengthCheckRx, &ug_regs->maccfg2); @@ -1702,8 +1704,10 @@ static void uec_configure_serdes(struct net_device *dev) * everything for us? Resetting it takes the link down and requires * several seconds for it to come back. */ - if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) + if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) { + put_device(&tbiphy->dev); return; + } /* Single clk mode, mii mode off(for serdes communication) */ phy_write(tbiphy, ENET_TBI_MII_ANA, TBIANA_SETTINGS); @@ -1711,6 +1715,8 @@ static void uec_configure_serdes(struct net_device *dev) phy_write(tbiphy, ENET_TBI_MII_TBICON, TBICON_CLK_SELECT); phy_write(tbiphy, ENET_TBI_MII_CR, TBICR_SETTINGS); + + put_device(&tbiphy->dev); } /* Configure the PHY for dev. diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 09ec32e..514df76 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3175,6 +3175,8 @@ static int mvneta_probe(struct platform_device *pdev) struct phy_device *phy = of_phy_find_device(dn); mvneta_fixed_link_update(pp, phy); + + put_device(&phy->dev); } return 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 6008eee..cf468c8 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -828,6 +828,8 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) if (!phydev) dev_info(dev, "MDIO of the phy is not registered yet\n"); + else + put_device(&phydev->dev); return 0; } -- cgit v0.10.2 From d618bf2bfd2a095644c852ebea16f5a981f9d875 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:23 +0100 Subject: phy: fixed-phy: properly validate phy in fixed_phy_update_state() Validate that the phy_device passed into fixed_phy_update_state() is a fixed-phy device before walking the list of phys for a fixed phy at the same address. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index fb1299c..e23bf5b 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -220,7 +220,7 @@ int fixed_phy_update_state(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!phydev || !phydev->bus) + if (!phydev || phydev->bus != fmb->mii_bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { -- cgit v0.10.2 From 38737e490d4ea91660d3cec83ef88c4e6d360ae4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:28 +0100 Subject: phy: add phy_device_remove() Add a phy_device_remove() function to complement phy_device_register(), which undoes the effects of phy_device_register() by removing the phy device from visibility, but not freeing it. This allows these details to be moved out of the mdio bus code into the phy code where this action belongs. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a5cf433..710715f 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1702,7 +1702,6 @@ static void gfar_configure_serdes(struct net_device *dev) tbiphy = of_phy_find_device(priv->tbi_node); if (!tbiphy) { dev_err(&dev->dev, "error: Could not get TBI device\n"); - put_device(&tbiphy->dev); return; } @@ -1711,8 +1710,10 @@ static void gfar_configure_serdes(struct net_device *dev) * everything for us? Resetting it takes the link down and requires * several seconds for it to come back. */ - if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) + if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) { + put_device(&tbiphy->dev); return; + } /* Single clk mode, mii mode off(for serdes communication) */ phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 99240662..c340e41 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -291,8 +291,11 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) error: while (--i >= 0) { - if (bus->phy_map[i]) - device_unregister(&bus->phy_map[i]->dev); + struct phy_device *phydev = bus->phy_map[i]; + if (phydev) { + phy_device_remove(phydev); + phy_device_free(phydev); + } } device_del(&bus->dev); return err; @@ -307,9 +310,11 @@ void mdiobus_unregister(struct mii_bus *bus) bus->state = MDIOBUS_UNREGISTERED; for (i = 0; i < PHY_MAX_ADDR; i++) { - if (bus->phy_map[i]) - device_unregister(&bus->phy_map[i]->dev); - bus->phy_map[i] = NULL; + struct phy_device *phydev = bus->phy_map[i]; + if (phydev) { + phy_device_remove(phydev); + phy_device_free(phydev); + } } device_del(&bus->dev); } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 97a4f52..f761288 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -384,6 +384,24 @@ int phy_device_register(struct phy_device *phydev) EXPORT_SYMBOL(phy_device_register); /** + * phy_device_remove - Remove a previously registered phy device from the MDIO bus + * @phydev: phy_device structure to remove + * + * This doesn't free the phy_device itself, it merely reverses the effects + * of phy_device_register(). Use phy_device_free() to free the device + * after calling this function. + */ +void phy_device_remove(struct phy_device *phydev) +{ + struct mii_bus *bus = phydev->bus; + int addr = phydev->addr; + + device_del(&phydev->dev); + bus->phy_map[addr] = NULL; +} +EXPORT_SYMBOL(phy_device_remove); + +/** * phy_find_first - finds the first PHY device on the bus * @bus: the target MII bus */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 11bce44..4a4e3a0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -745,6 +745,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_c45_device_ids *c45_ids); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); +void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); -- cgit v0.10.2 From 9861f72074c77a8a065622c1be7e9c4277e600eb Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:33 +0100 Subject: net: fix net_device refcounting of_find_net_device_by_node() uses class_find_device() internally to lookup the corresponding network device. class_find_device() returns a reference to the embedded struct device, with its refcount incremented. Add a comment to the definition in net/core/net-sysfs.c indicating the need to drop this refcount, and fix the DSA code to drop this refcount when the OF-generated platform data is cleaned up and freed. Also arrange for the ref to be dropped when handling errors. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b279077..805a95a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1481,6 +1481,15 @@ static int of_dev_node_match(struct device *dev, const void *data) return ret == 0 ? dev->of_node == data : ret; } +/* + * of_find_net_device_by_node - lookup the net device for the device node + * @np: OF device node + * + * Looks up the net_device structure corresponding with the device node. + * If successful, returns a pointer to the net_device with the embedded + * struct device refcount incremented by one, or NULL on failure. The + * refcount must be dropped when done with the net_device. + */ struct net_device *of_find_net_device_by_node(struct device_node *np) { struct device *dev; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index bf4ba15..c59fa5d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -679,7 +679,7 @@ static int dsa_of_probe(struct device *dev) pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { ret = -ENOMEM; - goto out_put_mdio; + goto out_put_ethernet; } dev->platform_data = pd; @@ -773,6 +773,8 @@ out_free_chip: out_free: kfree(pd); dev->platform_data = NULL; +out_put_ethernet: + put_device(ðernet_dev->dev); out_put_mdio: put_device(&mdio_bus->dev); return ret; @@ -786,6 +788,7 @@ static void dsa_of_remove(struct device *dev) return; dsa_of_free_platform_data(pd); + put_device(&pd->of_netdev->dev); kfree(pd); } #else -- cgit v0.10.2 From 79a8059d244e99454e474902e4325ee9b50e9178 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Sep 2015 07:46:55 +0200 Subject: KVM: svm: do not call kvm_set_cr0 from init_vmcb kvm_set_cr0 may want to call kvm_zap_gfn_range and thus access the memslots array (SRCU protected). Using a mini SRCU critical section is ugly, and adding it to kvm_arch_vcpu_create doesn't work because the VMX vcpu_create callback calls synchronize_srcu. Fixes this lockdep splat: =============================== [ INFO: suspicious RCU usage. ] 4.3.0-rc1+ #1 Not tainted ------------------------------- include/linux/kvm_host.h:488 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by qemu-system-i38/17000: #0: (&(&kvm->mmu_lock)->rlock){+.+...}, at: kvm_zap_gfn_range+0x24/0x1a0 [kvm] [...] Call Trace: dump_stack+0x4e/0x84 lockdep_rcu_suspicious+0xfd/0x130 kvm_zap_gfn_range+0x188/0x1a0 [kvm] kvm_set_cr0+0xde/0x1e0 [kvm] init_vmcb+0x760/0xad0 [kvm_amd] svm_create_vcpu+0x197/0x250 [kvm_amd] kvm_arch_vcpu_create+0x47/0x70 [kvm] kvm_vm_ioctl+0x302/0x7e0 [kvm] ? __lock_is_held+0x51/0x70 ? __fget+0x101/0x210 do_vfs_ioctl+0x2f4/0x560 ? __fget_light+0x29/0x90 SyS_ioctl+0x4c/0x90 entry_SYSCALL_64_fastpath+0x16/0x73 Reported-by: Borislav Petkov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89173af..94b7d15 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1263,7 +1264,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. * It also updates the guest-visible cr0 value. */ - (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; -- cgit v0.10.2 From 6fec21449a62702a582cecbb0b351363e039c95e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Sep 2015 23:02:14 +0200 Subject: KVM: x86: use correct page table format to check nested page table reserved bits Intel CPUID on AMD host or vice versa is a weird case, but it can happen. Handle it by checking the host CPU vendor instead of the guest's in reset_tdp_shadow_zero_bits_mask. For speed, the check uses the fact that Intel EPT has an X (executable) bit while AMD NPT has NX. Reported-by: Borislav Petkov Tested-by: Borislav Petkov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 69088a1..6f20763 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3614,7 +3614,7 @@ static void __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *rsvd_check, int maxphyaddr, int level, bool nx, bool gbpages, - bool pse) + bool pse, bool amd) { u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; @@ -3631,7 +3631,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ - if (guest_cpuid_is_amd(vcpu)) + if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { @@ -3699,7 +3699,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, context->nx, guest_cpuid_has_gbpages(vcpu), - is_pse(vcpu)); + is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } static void @@ -3749,13 +3749,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + /* + * Passing "true" to the last argument is okay; it adds a check + * on bit 8 of the SPTEs which KVM doesn't use anyway. + */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, context->nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), + true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); +static inline bool boot_cpu_is_amd(void) +{ + WARN_ON_ONCE(!tdp_enabled); + return shadow_x_mask == 0; +} + /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. @@ -3764,11 +3775,11 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - if (guest_cpuid_is_amd(vcpu)) + if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true); + cpu_has_gbpages, true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, -- cgit v0.10.2 From 58c95070da3a504fbeca7939435bbb062cb96ea3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Sep 2015 10:15:59 +0200 Subject: KVM: x86: fix off-by-one in reserved bits check 29ecd6601904 ("KVM: x86: avoid uninitialized variable warning", 2015-09-06) introduced a not-so-subtle problem, which probably escaped review because it was not part of the patch context. Before the patch, leaf was always equal to iterator.level. After, it is equal to iterator.level - 1 in the call to is_shadow_zero_bits_set, and when is_shadow_zero_bits_set does another "-1" the check on reserved bits becomes incorrect. Using "iterator.level" in the call fixes this call trace: WARNING: CPU: 2 PID: 17000 at arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.part.93+0x1a/0x20 [kvm]() Modules linked in: tun sha256_ssse3 sha256_generic drbg binfmt_misc ipv6 vfat fat fuse dm_crypt dm_mod kvm_amd kvm crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd fam15h_power amd64_edac_mod k10temp edac_core amdkfd amd_iommu_v2 radeon acpi_cpufreq [...] Call Trace: dump_stack+0x4e/0x84 warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 handle_mmio_page_fault.part.93+0x1a/0x20 [kvm] tdp_page_fault+0x231/0x290 [kvm] ? emulator_pio_in_out+0x6e/0xf0 [kvm] kvm_mmu_page_fault+0x36/0x240 [kvm] ? svm_set_cr0+0x95/0xc0 [kvm_amd] pf_interception+0xde/0x1d0 [kvm_amd] handle_exit+0x181/0xa70 [kvm_amd] ? kvm_arch_vcpu_ioctl_run+0x68b/0x1730 [kvm] kvm_arch_vcpu_ioctl_run+0x6f6/0x1730 [kvm] ? kvm_arch_vcpu_ioctl_run+0x68b/0x1730 [kvm] ? preempt_count_sub+0x9b/0xf0 ? mutex_lock_killable_nested+0x26f/0x490 ? preempt_count_sub+0x9b/0xf0 kvm_vcpu_ioctl+0x358/0x710 [kvm] ? __fget+0x5/0x210 ? __fget+0x101/0x210 do_vfs_ioctl+0x2f4/0x560 ? __fget_light+0x29/0x90 SyS_ioctl+0x4c/0x90 entry_SYSCALL_64_fastpath+0x16/0x73 ---[ end trace 37901c8686d84de6 ]--- Reported-by: Borislav Petkov Tested-by: Borislav Petkov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6f20763..ff606f5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) break; reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - leaf); + iterator.level); } walk_shadow_page_lockless_end(vcpu); -- cgit v0.10.2 From 920552b213e3dc832a874b4e7ba29ecddbab31bc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Sep 2015 12:34:53 +0200 Subject: KVM: disable halt_poll_ns as default for s390x We observed some performance degradation on s390x with dynamic halt polling. Until we can provide a proper fix, let's enable halt_poll_ns as default only for supported architectures. Architectures are now free to set their own halt_poll_ns default value. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3df1e97..c4072d9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -33,6 +33,7 @@ #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG +#define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_VCPU_MAX_FEATURES 2 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4562459..ed03968 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -33,6 +33,7 @@ #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_HALT_POLL_NS_DEFAULT 500000 #include #include diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 3a54dbc..5a1a882 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -61,6 +61,7 @@ #define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 195886a..827a38d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,7 @@ #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +#define KVM_HALT_POLL_NS_DEFAULT 500000 /* These values are internal and can be increased later */ #define KVM_NR_IRQCHIPS 1 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 6ce4a0b..8ced426 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -35,6 +35,7 @@ */ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_HALT_POLL_NS_DEFAULT 0 #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 349f80a..2beee03 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -40,6 +40,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04146a2..8db1d93 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,8 +66,8 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -/* halt polling only reduces halt latency by 5-7 us, 500us is enough */ -static unsigned int halt_poll_ns = 500000; +/* Architectures should define their poll value according to the halt latency */ +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* Default doubles per-vcpu halt_poll_ns. */ -- cgit v0.10.2 From 357cd64c18404309aabefb82019b12773de31a12 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 00:07:17 +0100 Subject: phy: marvell: add link partner advertised modes Read the standard link partner advertisment registers and store it in phydev->lp_advertising, so ethtool can report this information to userspace via ethtool. Zero it as per genphy if autonegotiation is disabled. Tested with a Marvell 88E1512 PHY. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index e6897b6..5de8d58 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -785,6 +785,7 @@ static int marvell_read_status(struct phy_device *phydev) int adv; int err; int lpa; + int lpagb; int status = 0; /* Update the link, but return if there @@ -802,10 +803,17 @@ static int marvell_read_status(struct phy_device *phydev) if (lpa < 0) return lpa; + lpagb = phy_read(phydev, MII_STAT1000); + if (lpagb < 0) + return lpagb; + adv = phy_read(phydev, MII_ADVERTISE); if (adv < 0) return adv; + phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb) | + mii_lpa_to_ethtool_lpa_t(lpa); + lpa &= adv; if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) @@ -853,6 +861,7 @@ static int marvell_read_status(struct phy_device *phydev) phydev->speed = SPEED_10; phydev->pause = phydev->asym_pause = 0; + phydev->lp_advertising = 0; } return 0; -- cgit v0.10.2 From 21343ac21ec7d871e94e98e288f3398a4207d9c0 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 24 Sep 2015 15:46:53 +0530 Subject: net: via/Kconfig: GENERIC_PCI_IOMAP required if PCI not selected The builds of allmodconfig of avr32 is failing with: drivers/net/ethernet/via/via-rhine.c:1098:2: error: implicit declaration of function 'pci_iomap' [-Werror=implicit-function-declaration] drivers/net/ethernet/via/via-rhine.c:1119:2: error: implicit declaration of function 'pci_iounmap' [-Werror=implicit-function-declaration] The generic empty pci_iomap and pci_iounmap is used only if CONFIG_PCI is not defined and CONFIG_GENERIC_PCI_IOMAP is defined. Add GENERIC_PCI_IOMAP in the dependency list for VIA_RHINE as we are getting build failure when CONFIG_PCI and CONFIG_GENERIC_PCI_IOMAP both are not defined. Signed-off-by: Sudip Mukherjee Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig index 2f1264b..d3d0947 100644 --- a/drivers/net/ethernet/via/Kconfig +++ b/drivers/net/ethernet/via/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_VIA config VIA_RHINE tristate "VIA Rhine support" - depends on (PCI || OF_IRQ) + depends on PCI || (OF_IRQ && GENERIC_PCI_IOMAP) depends on HAS_DMA select CRC32 select MII -- cgit v0.10.2 From 58a89ecaca53736aa465170530acea4f8be34ab4 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 24 Sep 2015 12:54:01 +0200 Subject: ppp: fix lockdep splat in ppp_dev_uninit() ppp_dev_uninit() locks all_ppp_mutex while under rtnl mutex protection. ppp_create_interface() must then lock these mutexes in that same order to avoid possible deadlock. [ 120.880011] ====================================================== [ 120.880011] [ INFO: possible circular locking dependency detected ] [ 120.880011] 4.2.0 #1 Not tainted [ 120.880011] ------------------------------------------------------- [ 120.880011] ppp-apitest/15827 is trying to acquire lock: [ 120.880011] (&pn->all_ppp_mutex){+.+.+.}, at: [] ppp_dev_uninit+0x64/0xb0 [ppp_generic] [ 120.880011] [ 120.880011] but task is already holding lock: [ 120.880011] (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x12/0x14 [ 120.880011] [ 120.880011] which lock already depends on the new lock. [ 120.880011] [ 120.880011] [ 120.880011] the existing dependency chain (in reverse order) is: [ 120.880011] [ 120.880011] -> #1 (rtnl_mutex){+.+.+.}: [ 120.880011] [] lock_acquire+0xcf/0x10e [ 120.880011] [] mutex_lock_nested+0x56/0x341 [ 120.880011] [] rtnl_lock+0x12/0x14 [ 120.880011] [] register_netdev+0x11/0x27 [ 120.880011] [] ppp_ioctl+0x289/0xc98 [ppp_generic] [ 120.880011] [] do_vfs_ioctl+0x4ea/0x532 [ 120.880011] [] SyS_ioctl+0x4e/0x7d [ 120.880011] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 120.880011] [ 120.880011] -> #0 (&pn->all_ppp_mutex){+.+.+.}: [ 120.880011] [] __lock_acquire+0xb07/0xe76 [ 120.880011] [] lock_acquire+0xcf/0x10e [ 120.880011] [] mutex_lock_nested+0x56/0x341 [ 120.880011] [] ppp_dev_uninit+0x64/0xb0 [ppp_generic] [ 120.880011] [] rollback_registered_many+0x19e/0x252 [ 120.880011] [] rollback_registered+0x29/0x38 [ 120.880011] [] unregister_netdevice_queue+0x6a/0x77 [ 120.880011] [] ppp_release+0x42/0x79 [ppp_generic] [ 120.880011] [] __fput+0xec/0x192 [ 120.880011] [] ____fput+0x9/0xb [ 120.880011] [] task_work_run+0x66/0x80 [ 120.880011] [] prepare_exit_to_usermode+0x8c/0xa7 [ 120.880011] [] syscall_return_slowpath+0xe4/0x104 [ 120.880011] [] int_ret_from_sys_call+0x25/0x9f [ 120.880011] [ 120.880011] other info that might help us debug this: [ 120.880011] [ 120.880011] Possible unsafe locking scenario: [ 120.880011] [ 120.880011] CPU0 CPU1 [ 120.880011] ---- ---- [ 120.880011] lock(rtnl_mutex); [ 120.880011] lock(&pn->all_ppp_mutex); [ 120.880011] lock(rtnl_mutex); [ 120.880011] lock(&pn->all_ppp_mutex); [ 120.880011] [ 120.880011] *** DEADLOCK *** Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0481daf..ed00446 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2755,6 +2755,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit, */ dev_net_set(dev, net); + rtnl_lock(); mutex_lock(&pn->all_ppp_mutex); if (unit < 0) { @@ -2785,7 +2786,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit, ppp->file.index = unit; sprintf(dev->name, "ppp%d", unit); - ret = register_netdev(dev); + ret = register_netdevice(dev); if (ret != 0) { unit_put(&pn->units_idr, unit); netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n", @@ -2797,6 +2798,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit, atomic_inc(&ppp_unit_count); mutex_unlock(&pn->all_ppp_mutex); + rtnl_unlock(); *retp = 0; return ppp; -- cgit v0.10.2 From 59f069789c98678710ed30a4be0daa3546ec82c7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Sep 2015 11:56:56 +0100 Subject: net: update docbook comment for __mdiobus_register() Update the docbook comment for __mdiobus_register() to include the new module owner argument. This resolves a warning found by the 0-day builder. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c340e41..12f44c5 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -236,11 +236,14 @@ static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, #endif /** - * mdiobus_register - bring up all the PHYs on a given bus and attach them to bus + * __mdiobus_register - bring up all the PHYs on a given bus and attach them to bus * @bus: target mii_bus + * @owner: module containing bus accessor functions * * Description: Called by a bus driver to bring up all the PHYs - * on a given bus, and attach them to the bus. + * on a given bus, and attach them to the bus. Drivers should use + * mdiobus_register() rather than __mdiobus_register() unless they + * need to pass a specific owner module. * * Returns 0 on success or < 0 on error. */ -- cgit v0.10.2 From bdb06cbf77cb01911694cc9076ffa8196b7b9b61 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 24 Sep 2015 15:31:29 -0600 Subject: net: Fix panic in icmp_route_lookup Andrey reported a panic: [ 7249.865507] BUG: unable to handle kernel pointer dereference at 000000b4 [ 7249.865559] IP: [] icmp_route_lookup+0xaa/0x320 [ 7249.865598] *pdpt = 0000000030f7f001 *pde = 0000000000000000 [ 7249.865637] Oops: 0000 [#1] ... [ 7249.866811] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.3.0-999-generic #201509220155 [ 7249.866876] Hardware name: MSI MS-7250/MS-7250, BIOS 080014 08/02/2006 [ 7249.866916] task: c1a5ab00 ti: c1a52000 task.ti: c1a52000 [ 7249.866949] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 7249.866981] EIP is at icmp_route_lookup+0xaa/0x320 [ 7249.867012] EAX: 00000000 EBX: f483ba48 ECX: 00000000 EDX: f2e18a00 [ 7249.867045] ESI: 000000c0 EDI: f483ba70 EBP: f483b9ec ESP: f483b974 [ 7249.867077] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [ 7249.867108] CR0: 8005003b CR2: 000000b4 CR3: 36ee07c0 CR4: 000006f0 [ 7249.867141] Stack: [ 7249.867165] 320310ee 00000000 00000042 320310ee 00000000 c1aeca00 f3920240 f0c69180 [ 7249.867268] f483ba04 f855058b a89b66cd f483ba44 f8962f4b 00000000 e659266c f483ba54 [ 7249.867361] 8004753c f483ba5c f8962f4b f2031140 000003c1 ffbd8fa0 c16b0e00 00000064 [ 7249.867448] Call Trace: [ 7249.867494] [] ? e1000_xmit_frame+0x87b/0xdc0 [e1000e] [ 7249.867534] [] ? tcp_in_window+0xeb/0xb10 [nf_conntrack] [ 7249.867576] [] ? tcp_in_window+0xeb/0xb10 [nf_conntrack] [ 7249.867615] [] ? icmp_send+0xa0/0x380 [ 7249.867648] [] icmp_send+0x2cf/0x380 [ 7249.867681] [] nf_send_unreach+0xa6/0xc0 [nf_reject_ipv4] [ 7249.867714] [] reject_tg+0x7a/0x9f [ipt_REJECT] [ 7249.867746] [] ipt_do_table+0x317/0x70c [ip_tables] [ 7249.867780] [] ? __nf_conntrack_find_get+0x166/0x3b0 [nf_conntrack] [ 7249.867838] [] ? nf_conntrack_in+0x398/0x600 [nf_conntrack] [ 7249.867889] [] iptable_filter_hook+0x35/0x80 [iptable_filter] [ 7249.867933] [] nf_iterate+0x71/0x80 [ 7249.867970] [] nf_hook_slow+0x65/0xc0 [ 7249.868002] [] __ip_local_out_sk+0xc1/0xd0 [ 7249.868034] [] ? ip_forward_options+0x1a0/0x1a0 [ 7249.868066] [] ip_local_out_sk+0x16/0x30 [ 7249.868097] [] ip_send_skb+0x14/0x80 [ 7249.868129] [] ip_push_pending_frames+0x34/0x40 [ 7249.868163] [] ip_send_unicast_reply+0x282/0x310 [ 7249.868196] [] tcp_v4_send_reset+0x1b3/0x380 [ 7249.868227] [] tcp_v4_rcv+0x323/0x990 [ 7249.868257] [] ? nf_iterate+0x71/0x80 [ 7249.868289] [] ip_local_deliver_finish+0x8b/0x230 [ 7249.868322] [] ip_local_deliver+0x4c/0xa0 [ 7249.868353] [] ? ip_rcv_finish+0x390/0x390 [ 7249.868384] [] ip_rcv_finish+0x7c/0x390 [ 7249.868415] [] ip_rcv+0x2e0/0x420 ... Prior to the VRF change the oif was not set in the flow struct, so the VRF support should really have only added the vrf_master_ifindex lookup. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Cc: Andrey Melnikov Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 79fe05b..e5eb8ac 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; - fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex; + fl4.flowi4_oif = vrf_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex; + fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); -- cgit v0.10.2