From 55a8d02bbb25edf9589b2f11afec410d053da497 Mon Sep 17 00:00:00 2001 From: Nan Jia Date: Sun, 31 May 2015 19:53:28 +1000 Subject: jfs: removed a prohibited space after opening parenthesis Fixed a coding style issue. Signed-off-by: Nan Jia Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/file.c b/fs/jfs/file.c index e98d39d..b9dc23c 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -76,7 +76,7 @@ static int jfs_open(struct inode *inode, struct file *file) if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); - atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); + atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } -- cgit v0.10.2 From f7f31adf07d1c9e66c160e9bdd77e12531cd6996 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Jun 2015 17:57:03 +0100 Subject: jfs: fix indentation on if statement The if statement and closing brace are indented by 1 extra space, so remove this extra spacing. Cosmetic change only. Signed-off-by: Colin Ian King Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 070dc4b..28d69fa 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -133,11 +133,11 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) * It has been committed since the last change, but was still * on the dirty inode list. */ - if (!test_cflag(COMMIT_Dirty, inode)) { + if (!test_cflag(COMMIT_Dirty, inode)) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); return 0; - } + } if (jfs_commit_inode(inode, wait)) { jfs_err("jfs_write_inode: jfs_commit_inode failed!"); -- cgit v0.10.2 From 7c258670ce655659a4c8d1013676c55e74d09ee7 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Thu, 25 Jun 2015 23:27:55 -0700 Subject: Bluetooth: hidp: Initialize list header of hidp session user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When new hidp session is created, list header in l2cap_user is not initialized and this causes list_empty() to fail in l2cap_register_user() even if l2cap_user list is empty. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd..f1a117f 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, session->conn = l2cap_conn_get(conn); session->user.probe = hidp_session_probe; session->user.remove = hidp_session_remove; + INIT_LIST_HEAD(&session->user.list); session->ctrl_sock = ctrl_sock; session->intr_sock = intr_sock; skb_queue_head_init(&session->ctrl_transmit); -- cgit v0.10.2 From ab944c83f6690df0c7f67e6bcc29fc0c82ef6021 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Tue, 30 Jun 2015 11:43:40 -0700 Subject: Bluetooth: Reinitialize the list after deletion for session user list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user->list is deleted with list_del(), it doesn't initialize the entry which can cause the issue with list_empty(). According to the comment from the list.h, list_empty() returns false even if the list is empty and put the entry in an undefined state. /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ Because of this behavior, list_empty() returns false even if list is empty when the device is reconnected. So, user->list needs to be re-initialized after list_del(). list.h already have a macro list_del_init() which deletes the entry and initailze it again. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 51594fb..45fffa4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1634,7 +1634,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) if (list_empty(&user->list)) goto out_unlock; - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); out_unlock: @@ -1648,7 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn) while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); } } -- cgit v0.10.2 From db490cb9aeb77b392f4bdaad3ebe96ea1d5c7bfe Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Tue, 17 Mar 2015 16:02:20 +0100 Subject: s390/zcrypt: enable s390 hwrng to seed kernel entropy Set the 'quality' property in the zcrypt rng device structure to enable the zcrypt hwrng device to take part in the kernel entropy seeding process. A module parameter named hwrng_seed will be introduced to disable the participation. By default this parameter is set to 1 (enabled). Signed-off-by: Ingo Tuchscherer Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 08f1830..01bf1f5 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -54,6 +54,10 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor interface, " \ "Copyright IBM Corp. 2001, 2012"); MODULE_LICENSE("GPL"); +static int zcrypt_hwrng_seed = 1; +module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, S_IRUSR|S_IRGRP); +MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on)."); + static DEFINE_SPINLOCK(zcrypt_device_lock); static LIST_HEAD(zcrypt_device_list); static int zcrypt_device_count = 0; @@ -1373,6 +1377,7 @@ static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data) static struct hwrng zcrypt_rng_dev = { .name = "zcrypt", .data_read = zcrypt_rng_data_read, + .quality = 990, }; static int zcrypt_rng_device_add(void) @@ -1387,6 +1392,8 @@ static int zcrypt_rng_device_add(void) goto out; } zcrypt_rng_buffer_index = 0; + if (!zcrypt_hwrng_seed) + zcrypt_rng_dev.quality = 0; rc = hwrng_register(&zcrypt_rng_dev); if (rc) goto out_free; -- cgit v0.10.2 From a313bdc5310dd807655d3ca3eb2219cd65dfe45a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Jun 2015 09:32:22 +0200 Subject: s390/sclp: fix compile error Fix this error when compiling with CONFIG_SMP=n and CONFIG_DYNAMIC_DEBUG=y: drivers/s390/char/sclp_early.c: In function 'sclp_read_info_early': drivers/s390/char/sclp_early.c:87:19: error: 'EBUSY' undeclared (first use in this function) } while (rc == -EBUSY); ^ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index aeed796..7bc6df3 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "sclp_early" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include -- cgit v0.10.2 From a215c8fbde8600d133f122691d0f8c30de6dda02 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Jun 2015 14:52:13 +0200 Subject: s390/oprofile: fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix these errors when compiling with CONFIG_OPROFILE=y and CONFIG_PERF_EVENTS=n: arch/s390/oprofile/init.c: In function ‘oprofile_hwsampler_start’: arch/s390/oprofile/init.c:93:2: error: implicit declaration of function 'perf_reserve_sampling' [-Werror=implicit-function-declaration] retval = perf_reserve_sampling(); ^ arch/s390/oprofile/init.c:99:3: error: implicit declaration of function 'perf_release_sampling' [-Werror=implicit-function-declaration] perf_release_sampling(); ^ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4cb19fe..f897ec7 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -87,7 +87,15 @@ struct sf_raw_sample { } __packed; /* Perf hardware reserve and release functions */ +#ifdef CONFIG_PERF_EVENTS int perf_reserve_sampling(void); void perf_release_sampling(void); +#else /* CONFIG_PERF_EVENTS */ +static inline int perf_reserve_sampling(void) +{ + return 0; +} +static inline void perf_release_sampling(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index bc927a0..9cfa2ff 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "../../../drivers/oprofile/oprof.h" -- cgit v0.10.2 From f307170d6e591a48529425b1ed6ca835790995a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 17:23:37 -0500 Subject: netfilter: nf_queue: Don't recompute the hook_list head If someone sends packets from one of the netdevice ingress hooks to the a userspace queue, and then userspace later accepts the packet, the netfilter code can enter an infinite loop as the list head will never be found. Pass in the saved list_head to avoid this. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index cd60d39..8a8b2ab 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + verdict = nf_iterate(entry->state.hook_list, skb, &entry->state, &elem); } -- cgit v0.10.2 From a1bc1b356a9d21bf29bc7c873718b5cacdf119b4 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 20 Jun 2015 00:17:50 +0200 Subject: netfilter: bridge: fix CONFIG_NF_DEFRAG_IPV4/6 related warnings/errors br_nf_ip_fragment() is not needed when neither CONFIG_NF_DEFRAG_IPV4 nor CONFIG_NF_DEFRAG_IPV6 is set. struct brnf_frag_data must be available if either CONFIG_NF_DEFRAG_IPV4 or CONFIG_NF_DEFRAG_IPV6 is set. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d89f4fa..8a394bd 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; @@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) { @@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +#endif static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { -- cgit v0.10.2 From 3bd229976f64bea64c60803f9fc8d9f0059ba2f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:21:00 +0200 Subject: netfilter: arptables: use percpu jumpstack commit 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Unlike ip and ip6tables, arp tables were never converted to use the percpu jump stack. It still uses the rule blob to store return address, which isn't safe anymore since we now share this blob among all processors. Because there is no TEE support for arptables, we don't need to cope with reentrancy, so we can use loocal variable to hold stack offset. Fixes: 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95c9b6e..92305a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; - struct arpt_entry *e, *back; + struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; + unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + cpu = smp_processor_id(); /* * Ensure we load private-> members after we've fetched the base * pointer. */ smp_read_barrier_depends(); table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; e = get_entry(table_base, private->hook_entry[hook]); - back = get_entry(table_base, private->underflow[hook]); acpar.in = state->in; acpar.out = state->out; @@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } continue; } if (table_base + v != arpt_next_entry(e)) { - /* Save old back ptr in next entry */ - struct arpt_entry *next = arpt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (stackidx >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); -- cgit v0.10.2 From dd302b59bde0149c20df7278c0d36c765e66afbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:27:51 +0200 Subject: netfilter: bridge: don't leak skb in error paths br_nf_dev_queue_xmit must free skb in its error path. NF_DROP is misleading -- its an okfn, not a netfilter hook. Fixes: 462fb2af9788a ("bridge : Sanitize skb before it enters the IP stack") Fixes: efb6de9b4ba00 ("netfilter: bridge: forward IPv6 fragmented packets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8a394bd..c8b9bcf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -744,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv4(skb)) - return NF_DROP; + goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv6(skb)) - return NF_DROP; + goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -784,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (v6ops) return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; + + kfree_skb(skb); + return -EMSGSIZE; } #endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + drop: + kfree_skb(skb); + return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ -- cgit v0.10.2 From 6742b9e310bcf511b876532846e5302b07b7fedc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Jul 2015 16:14:25 +0200 Subject: netfilter: nfnetlink: keep going batch handling on missing modules After a fresh boot with no modules in place at all and a large rulesets, the existing nfnetlink_rcv_batch() funcion can take long time to commit the ruleset due to the many abort path. This is specifically a problem for the existing client of this code, ie. nf_tables, since it results in several synchronize_rcu() call in a row. This patch changes the policy to keep full batch processing on missing modules errors so we abort only once. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c9..0c0e8ec 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) } } +enum { + NFNL_BATCH_FAILURE = (1 << 0), + NFNL_BATCH_DONE = (1 << 1), + NFNL_BATCH_REPLAY = (1 << 2), +}; + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - bool success = true, done = false; static LIST_HEAD(err_list); + u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: + status = 0; + skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); @@ -336,10 +344,10 @@ replay: if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { - done = true; + status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; @@ -382,11 +390,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - nfnl_err_reset(&err_list); - ss->abort(oskb); - nfnl_unlock(subsys_id); - kfree_skb(skb); - goto replay; + status |= NFNL_BATCH_REPLAY; + goto next; } } ack: @@ -402,7 +407,7 @@ ack: */ nfnl_err_reset(&err_list); netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, @@ -410,19 +415,26 @@ ack: * triggers. */ if (err) - success = false; + status |= NFNL_BATCH_FAILURE; } - +next: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: - if (success && done) + if (status & NFNL_BATCH_REPLAY) { + ss->abort(oskb); + nfnl_err_reset(&err_list); + nfnl_unlock(subsys_id); + kfree_skb(skb); + goto replay; + } else if (status == NFNL_BATCH_DONE) { ss->commit(oskb); - else + } else { ss->abort(oskb); + } nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); -- cgit v0.10.2 From 25c14ef86a0d5ec9320e833685c15bc96f504864 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Wed, 1 Jul 2015 14:21:57 +0530 Subject: enic: fix issues in enic_poll In enic_poll, we clean tx and rx queues, when low latency busy socket polling is happening, enic_poll will only clean tx queue. After cleaning tx, it should return total budget for re-poll. There is a small window between vnic_intr_unmask() and enic_poll_unlock_napi(). In this window if an irq occurs and napi is scheduled on different cpu, it tries to acquire enic_poll_lock_napi() and fails. Unlock napi_poll before unmasking the interrupt. v2: Do not change tx wonk done behaviour. Consider only rx work done for completing napi. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index da2004e..918a8e4 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1170,7 +1170,7 @@ static int enic_poll(struct napi_struct *napi, int budget) wq_work_done, 0 /* dont unmask intr */, 0 /* dont reset intr timer */); - return rq_work_done; + return budget; } if (budget > 0) @@ -1191,6 +1191,7 @@ static int enic_poll(struct napi_struct *napi, int budget) 0 /* don't reset intr timer */); err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); + enic_poll_unlock_napi(&enic->rq[cq_rq], napi); /* Buffer allocation failed. Stay in polling * mode so we can try to fill the ring again. @@ -1208,7 +1209,6 @@ static int enic_poll(struct napi_struct *napi, int budget) napi_complete(napi); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[cq_rq], napi); return rq_work_done; } -- cgit v0.10.2 From 462e1ead9296a8452499fb10cf3b51903ffe24ac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 2 Jul 2015 05:48:17 -0700 Subject: bridge: vlan: fix usage of vlan 0 and 4095 again Vlan ids 0 and 4095 were disallowed by commit: 8adff41c3d25 ("bridge: Don't use VID 0 and 4095 in vlan filtering") but then the check was removed when vlan ranges were introduced by: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") So reintroduce the vlan range check. Before patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master (succeeds) After Patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master RTNETLINK answers: Invalid argument Signed-off-by: Nikolay Aleksandrov Fixes: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") Acked-by: Toshiaki Makita Signed-off-by: David S. Miller diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b67ed3..364bdc9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vinfo_start) return -EINVAL; -- cgit v0.10.2 From 87775312a86bcf213e3b21f6f7c79e2e00d96f7b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 16:30:24 +0200 Subject: net-ipv6: Delete an unnecessary check before the function call "free_percpu" The free_percpu() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1a1122a..6090969 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,10 +369,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - - if (rt->rt6i_pcpu) - free_percpu(rt->rt6i_pcpu); - + free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; -- cgit v0.10.2 From f6e1c91666990d06ba37c76129495c724202144d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 17:58:21 +0200 Subject: net-RDS: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller diff --git a/net/rds/transport.c b/net/rds/transport.c index 8b4a6cd..83498e1 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { - if (trans && trans->t_owner) + if (trans) module_put(trans->t_owner); } -- cgit v0.10.2 From 92b80eb33c52583b48ed289bd993579b87b52d9a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 18:38:12 +0200 Subject: netlink: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dea9253..9a0ae71 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt) out: spin_unlock(&netlink_tap_lock); - if (found && nt->module) + if (found) module_put(nt->module); return found ? 0 : -ENODEV; -- cgit v0.10.2 From 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 Mon Sep 17 00:00:00 2001 From: Angga Date: Fri, 3 Jul 2015 14:40:52 +1200 Subject: ipv6: Make MLD packets to only be processed locally Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") MLD packets were only processed locally. After the change, a copy of MLD packet goes through ip6_mr_input, causing MRT6MSG_NOCACHE message to be generated to user space. Make MLD packet only processed locally. Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") Signed-off-by: Hermin Anggawijaya Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464e..57990c9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } -- cgit v0.10.2 From fda8b18c515a5e2caf821887ceafb42c35094eaf Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 3 Jul 2015 16:10:51 +0530 Subject: cxgb4: Fix incorrect sequence numbers shown in devlog Part of commit 49aa284fe64c4c1 ("cxgb4: Add support for devlog") change introduced a real bug where the Device Log Sequence Numbers are no longer being converted from firmware Big-Endian to local CPU-Endian format. This patch moves all of the translation into the devlog_show() routine. The only endianness code now in devlog_open() is the small loop to find the earliest (lowest Sequence Number) Device Log entry in the circular buffer. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 484eb8c..a11485f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -952,16 +952,23 @@ static int devlog_show(struct seq_file *seq, void *v) * eventually have to put a format interpreter in here ... */ seq_printf(seq, "%10d %15llu %8s %8s ", - e->seqno, e->timestamp, + be32_to_cpu(e->seqno), + be64_to_cpu(e->timestamp), (e->level < ARRAY_SIZE(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < ARRAY_SIZE(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); - seq_printf(seq, e->fmt, e->params[0], e->params[1], - e->params[2], e->params[3], e->params[4], - e->params[5], e->params[6], e->params[7]); + seq_printf(seq, e->fmt, + be32_to_cpu(e->params[0]), + be32_to_cpu(e->params[1]), + be32_to_cpu(e->params[2]), + be32_to_cpu(e->params[3]), + be32_to_cpu(e->params[4]), + be32_to_cpu(e->params[5]), + be32_to_cpu(e->params[6]), + be32_to_cpu(e->params[7])); } return 0; } @@ -1043,23 +1050,17 @@ static int devlog_open(struct inode *inode, struct file *file) return ret; } - /* Translate log multi-byte integral elements into host native format - * and determine where the first entry in the log is. + /* Find the earliest (lowest Sequence Number) log entry in the + * circular Device Log. */ for (fseqno = ~((u32)0), index = 0; index < dinfo->nentries; index++) { struct fw_devlog_e *e = &dinfo->log[index]; - int i; __u32 seqno; if (e->timestamp == 0) continue; - e->timestamp = (__force __be64)be64_to_cpu(e->timestamp); seqno = be32_to_cpu(e->seqno); - for (i = 0; i < 8; i++) - e->params[i] = - (__force __be32)be32_to_cpu(e->params[i]); - if (seqno < fseqno) { fseqno = seqno; dinfo->first = index; -- cgit v0.10.2 From 0fd972a7d91d6e15393c449492a04d94c0b89351 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:13:42 -0400 Subject: module: relocate module_init from init.h to module.h Modular users will always be users of init functionality, but users of init functionality are not necessarily always modules. Hence any functionality like module_init and module_exit would be more at home in the module.h file. And module.h should explicitly include init.h to make the dependency clear. We've already done all the legwork needed to ensure that this move does not cause any build regressions due to implicit header file include assumptions about where module_init lives. Cc: Rusty Russell Acked-by: Rusty Russell Signed-off-by: Paul Gortmaker diff --git a/include/linux/init.h b/include/linux/init.h index 7c68c36..b449f37 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -282,68 +282,8 @@ void __init parse_early_param(void); void __init parse_early_options(char *cmdline); #endif /* __ASSEMBLY__ */ -/** - * module_init() - driver initialization entry point - * @x: function to be run at kernel boot time or module insertion - * - * module_init() will either be called during do_initcalls() (if - * builtin) or at module insertion time (if a module). There can only - * be one per module. - */ -#define module_init(x) __initcall(x); - -/** - * module_exit() - driver exit entry point - * @x: function to be run when driver is removed - * - * module_exit() will wrap the driver clean-up code - * with cleanup_module() when used with rmmod when - * the driver is a module. If the driver is statically - * compiled into the kernel, module_exit() has no effect. - * There can only be one per module. - */ -#define module_exit(x) __exitcall(x); - #else /* MODULE */ -/* - * In most cases loadable modules do not need custom - * initcall levels. There are still some valid cases where - * a driver may be needed early if built in, and does not - * matter when built as a loadable module. Like bus - * snooping debug drivers. - */ -#define early_initcall(fn) module_init(fn) -#define core_initcall(fn) module_init(fn) -#define core_initcall_sync(fn) module_init(fn) -#define postcore_initcall(fn) module_init(fn) -#define postcore_initcall_sync(fn) module_init(fn) -#define arch_initcall(fn) module_init(fn) -#define subsys_initcall(fn) module_init(fn) -#define subsys_initcall_sync(fn) module_init(fn) -#define fs_initcall(fn) module_init(fn) -#define fs_initcall_sync(fn) module_init(fn) -#define rootfs_initcall(fn) module_init(fn) -#define device_initcall(fn) module_init(fn) -#define device_initcall_sync(fn) module_init(fn) -#define late_initcall(fn) module_init(fn) -#define late_initcall_sync(fn) module_init(fn) - -#define console_initcall(fn) module_init(fn) -#define security_initcall(fn) module_init(fn) - -/* Each module must use one module_init(). */ -#define module_init(initfn) \ - static inline initcall_t __inittest(void) \ - { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); - -/* This is only required if you want to be unloadable. */ -#define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ - { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); - #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup(str, func) /* nothing */ #endif @@ -351,24 +291,6 @@ void __init parse_early_options(char *cmdline); /* Data marked not to be saved by software suspend */ #define __nosavedata __section(.data..nosave) -/* This means "can be init if no module support, otherwise module load - may call it." */ -#ifdef CONFIG_MODULES -#define __init_or_module -#define __initdata_or_module -#define __initconst_or_module -#define __INIT_OR_MODULE .text -#define __INITDATA_OR_MODULE .data -#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits -#else -#define __init_or_module __init -#define __initdata_or_module __initdata -#define __initconst_or_module __initconst -#define __INIT_OR_MODULE __INIT -#define __INITDATA_OR_MODULE __INITDATA -#define __INITRODATA_OR_MODULE __INITRODATA -#endif /*CONFIG_MODULES*/ - #ifdef MODULE #define __exit_p(x) x #else diff --git a/include/linux/module.h b/include/linux/module.h index d67b193..3a19c79 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,89 @@ extern struct module_attribute module_uevent; extern int init_module(void); extern void cleanup_module(void); +#ifndef MODULE +/** + * module_init() - driver initialization entry point + * @x: function to be run at kernel boot time or module insertion + * + * module_init() will either be called during do_initcalls() (if + * builtin) or at module insertion time (if a module). There can only + * be one per module. + */ +#define module_init(x) __initcall(x); + +/** + * module_exit() - driver exit entry point + * @x: function to be run when driver is removed + * + * module_exit() will wrap the driver clean-up code + * with cleanup_module() when used with rmmod when + * the driver is a module. If the driver is statically + * compiled into the kernel, module_exit() has no effect. + * There can only be one per module. + */ +#define module_exit(x) __exitcall(x); + +#else /* MODULE */ + +/* + * In most cases loadable modules do not need custom + * initcall levels. There are still some valid cases where + * a driver may be needed early if built in, and does not + * matter when built as a loadable module. Like bus + * snooping debug drivers. + */ +#define early_initcall(fn) module_init(fn) +#define core_initcall(fn) module_init(fn) +#define core_initcall_sync(fn) module_init(fn) +#define postcore_initcall(fn) module_init(fn) +#define postcore_initcall_sync(fn) module_init(fn) +#define arch_initcall(fn) module_init(fn) +#define subsys_initcall(fn) module_init(fn) +#define subsys_initcall_sync(fn) module_init(fn) +#define fs_initcall(fn) module_init(fn) +#define fs_initcall_sync(fn) module_init(fn) +#define rootfs_initcall(fn) module_init(fn) +#define device_initcall(fn) module_init(fn) +#define device_initcall_sync(fn) module_init(fn) +#define late_initcall(fn) module_init(fn) +#define late_initcall_sync(fn) module_init(fn) + +#define console_initcall(fn) module_init(fn) +#define security_initcall(fn) module_init(fn) + +/* Each module must use one module_init(). */ +#define module_init(initfn) \ + static inline initcall_t __inittest(void) \ + { return initfn; } \ + int init_module(void) __attribute__((alias(#initfn))); + +/* This is only required if you want to be unloadable. */ +#define module_exit(exitfn) \ + static inline exitcall_t __exittest(void) \ + { return exitfn; } \ + void cleanup_module(void) __attribute__((alias(#exitfn))); + +#endif + +/* This means "can be init if no module support, otherwise module load + may call it." */ +#ifdef CONFIG_MODULES +#define __init_or_module +#define __initdata_or_module +#define __initconst_or_module +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits +#else +#define __init_or_module __init +#define __initdata_or_module __initdata +#define __initconst_or_module __initconst +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#define __INITRODATA_OR_MODULE __INITRODATA +#endif /*CONFIG_MODULES*/ + /* Archs provide a method of finding the correct exception table. */ struct exception_table_entry; -- cgit v0.10.2 From 14a0abfc4ab0b49c7e48eca5c6b31288ea457dee Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 26 Jun 2015 12:42:53 +0530 Subject: ARC: Kconfig: better way to disable ARC_HAS_LLSC for ARC_CPU_750D Signed-off-by: Vineet Gupta diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e7cee0a..91cf405 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -115,6 +115,7 @@ if ISA_ARCOMPACT config ARC_CPU_750D bool "ARC750D" + select ARC_CANT_LLSC help Support for ARC750 core @@ -362,7 +363,7 @@ config ARC_CANT_LLSC config ARC_HAS_LLSC bool "Insn: LLOCK/SCOND (efficient atomic ops)" default y - depends on !ARC_CPU_750D && !ARC_CANT_LLSC + depends on !ARC_CANT_LLSC config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" -- cgit v0.10.2 From b607eddd7122595bbcf49c00192faf3e49b142d3 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 29 Jun 2015 15:24:37 +0300 Subject: ARCv2: guard SLC DMA ops with spinlock SLC maintenance ops need to be serialized by software as there is no inherent buffering / quequing of aux commands. It can silently ignore a new aux operation if previous one is still ongoing (SLC_CTRL_BUSY) So gaurd the SLC op using a spin lock The spin lock doesn't seem to be contended even in heavy workloads such as iperf. On FPGA @ 75 MHz. [1] Before this change: ============================================================ # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.110 port 38935 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 48.4 MBytes 40.6 Mbits/sec ============================================================ [2] After this change: ============================================================ # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.243 port 60248 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 47.5 MBytes 39.8 Mbits/sec # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.243 port 60249 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 54.9 MBytes 46.0 Mbits/sec ============================================================ Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com Signed-off-by: Vineet Gupta diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b29d62e..1cd6695 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -468,10 +468,18 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; - local_irq_save(flags); + spin_lock_irqsave(&lock, flags); /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] @@ -504,7 +512,7 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); - local_irq_restore(flags); + spin_unlock_irqrestore(&lock, flags); #endif } -- cgit v0.10.2 From 61754c18752ffb78145671e94f053fb202fff041 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 1 Jul 2015 17:19:30 +0200 Subject: kbuild: Allow arch Makefiles to override {cpp,ld,c}flags Since commit a1c48bb1 (Makefile: Fix unrecognized cross-compiler command line options), the arch Makefile is included earlier by the main Makefile, preventing the arc architecture to set its -O3 compiler option. Since there might be more use cases for an arch Makefile to fine-tune the options, add support for ARCH_CPPFLAGS, ARCH_AFLAGS and ARCH_CFLAGS variables that are appended to the respective kbuild variables. The user still has the final say via the KCPPFLAGS, KAFLAGS and KCFLAGS variables. Reported-by: Vineet Gupta Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Michal Marek diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index e63b446..13f888a 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -952,6 +952,14 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic mode) if this option is supported by $(AR). + ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS Overrides the kbuild defaults + + These variables are appended to the KBUILD_CPPFLAGS, + KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the + top-level Makefile has set any other flags. This provides a + means for an architecture to override the defaults. + + --- 6.2 Add prerequisites to archheaders: The archheaders: rule is used to generate header files that diff --git a/Makefile b/Makefile index 13270c0..8c31fbc 100644 --- a/Makefile +++ b/Makefile @@ -780,10 +780,11 @@ endif include scripts/Makefile.kasan include scripts/Makefile.extrawarn -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments -KBUILD_CPPFLAGS += $(KCPPFLAGS) -KBUILD_AFLAGS += $(KAFLAGS) -KBUILD_CFLAGS += $(KCFLAGS) +# Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the +# last assignments +KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) +KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) +KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) # Use --build-id when available. LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ -- cgit v0.10.2 From 97709069214eb75312c14946803b9da4d3814203 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 18 Jun 2015 13:54:01 +0530 Subject: ARC: Override toplevel default -O2 with -O3 ARC kernels have historically been built with -O3, despite top level Makefile defaulting to -O2. This was facilitated by implicitly ordering of arch makefile include AFTER top level assigned -O2. An upstream fix to top level a1c48bb160f ("Makefile: Fix unrecognized cross-compiler command line options") changed the ordering, making ARC -O3 defunct. Fix that by NOT relying on any ordering whatsoever and use the proper arch override facility now present in kbuild (ARCH_*FLAGS) Depends-on: ("kbuild: Allow arch Makefiles to override {cpp,ld,c}flags") Suggested-by: Michal Marek Cc: Geert Uytterhoeven Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Vineet Gupta diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 6107062..46d8731 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -49,7 +49,8 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 -cflags-y += -O3 +# Note: No need to add to cflags-y as that happens anyways +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it -- cgit v0.10.2 From f718c2efff0b0460e5335607a1c6caf620847680 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Jul 2015 10:40:43 +0530 Subject: ARC: Don't memzero twice in dma_alloc_coherent for __GFP_ZERO alloc_pages_exact() get gfp flags and handle zero'ing already And while it, fix the case where ioremap fails: return rightaway. Signed-off-by: Vineet Gupta diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 74a637a..57706a9 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -60,8 +60,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, /* This is kernel Virtual address (0x7000_0000 based) */ kvaddr = ioremap_nocache((unsigned long)paddr, size); - if (kvaddr != NULL) - memset(kvaddr, 0, size); + if (kvaddr == NULL) + return NULL; /* This is bus address, platform dependent */ *dma_handle = (dma_addr_t)paddr; -- cgit v0.10.2 From bccea41ec02301a439a26e43fa49521ae8da4352 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Jul 2015 10:25:01 +0530 Subject: ARC: fix unused var wanring Fixes: 9bf39ab2adaf ("vfs: add file_path() helper") Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 807f7d6..a6f91e8 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -58,7 +58,6 @@ static void show_callee_regs(struct callee_regs *cregs) static void print_task_path_n_nm(struct task_struct *tsk, char *buf) { - struct path path; char *path_nm = NULL; struct mm_struct *mm; struct file *exe_file; -- cgit v0.10.2 From 83ce3e6fcc16b4d36d40765618777b5a6a30d75b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 30 Jun 2015 13:37:28 +0530 Subject: ARCv2: intc: IDU: support irq affinity With this nsim standlone / OSCI have working irq affinity - AXS103 still needs some work as IDU is not visible in intc hierarchy yet ! Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 30284e8..dfeea22 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -218,11 +218,28 @@ static void idu_irq_unmask(struct irq_data *data) raw_spin_unlock_irqrestore(&mcip_lock, flags); } +#ifdef CONFIG_SMP static int -idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f) +idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, + bool force) { + unsigned long flags; + cpumask_t online; + + /* errout if no online cpu per @cpumask */ + if (!cpumask_and(&online, cpumask, cpu_online_mask)) + return -EINVAL; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + idu_set_dest(data->hwirq, cpumask_bits(&online)[0]); + idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); + return IRQ_SET_MASK_OK; } +#endif static struct irq_chip idu_irq_chip = { .name = "MCIP IDU Intc", -- cgit v0.10.2 From 6b12ec177c410ef984d2b97717df77c9269eaeac Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 2 Jul 2015 14:02:54 +0530 Subject: ARCv2: intc: IDU: Fix potential race in installing a chained IRQ handler Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index dfeea22..6fb0a2f 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -347,8 +347,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent) if (!i) idu_first_irq = irq; - irq_set_handler_data(irq, domain); - irq_set_chained_handler(irq, idu_cascade_isr); + irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain); } __mcip_cmd(CMD_IDU_ENABLE, 0); -- cgit v0.10.2 From acb33cc541d7a5495b16a133702d4c401ea4e294 Mon Sep 17 00:00:00 2001 From: "Vutla, Lokesh" Date: Thu, 2 Jul 2015 18:33:28 +0530 Subject: crypto: omap-des - Fix unmapping of dma channels dma_unmap_sg() is being called twice after completing the task. Looks like this is a copy paste error when creating des driver. With this the following warn appears during boot: [ 4.210457] ------------[ cut here ]------------ [ 4.215114] WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1080 check_unmap+0x710/0x9a0() [ 4.222899] omap-des 480a5000.des: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000ab2ce000] [size=8 bytes] [ 4.236785] Modules linked in: [ 4.239860] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.39-02999-g1bc045a-dirty #182 [ 4.247918] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 4.255710] [] (show_stack) from [] (dump_stack+0x84/0xb8) [ 4.262977] [] (dump_stack) from [] (warn_slowpath_common+0x68/0x8c) [ 4.271107] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 4.279854] [] (warn_slowpath_fmt) from [] (check_unmap+0x710/0x9a0) [ 4.287991] [] (check_unmap) from [] (debug_dma_unmap_sg+0x90/0x19c) [ 4.296128] [] (debug_dma_unmap_sg) from [] (omap_des_done_task+0x1cc/0x3e4) [ 4.304963] [] (omap_des_done_task) from [] (tasklet_action+0x84/0x124) [ 4.313370] [] (tasklet_action) from [] (__do_softirq+0xf0/0x20c) [ 4.321235] [] (__do_softirq) from [] (irq_exit+0x98/0xec) [ 4.328500] [] (irq_exit) from [] (handle_IRQ+0x50/0xb0) [ 4.335589] [] (handle_IRQ) from [] (gic_handle_irq+0x28/0x5c) Removing the duplicate call to dma_unmap_sg(). Cc: stable@vger.kernel.org Reported-by: Tomi Valkeinen Signed-off-by: Lokesh Vutla Signed-off-by: Herbert Xu diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 4630709..0a70e46 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } -- cgit v0.10.2 From b78fb51b68884ba9b8541a2f1914e8b6d054474c Mon Sep 17 00:00:00 2001 From: "qipeng.zha" Date: Tue, 7 Jul 2015 00:04:45 +0800 Subject: intel_pmc_ipc: Fix compiler casting warnings Avoid casting variables to different sizes due to different compilers and settings. Reported-by: Fengguang Wu Signed-off-by: qipeng.zha Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index d734763..69d43b8 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -96,18 +96,18 @@ static struct intel_pmc_ipc_dev { struct completion cmd_complete; /* The following PMC BARs share the same ACPI device with the IPC */ - void *acpi_io_base; + resource_size_t acpi_io_base; int acpi_io_size; struct platform_device *tco_dev; /* gcr */ - void *gcr_base; + resource_size_t gcr_base; int gcr_size; /* punit */ - void *punit_base; + resource_size_t punit_base; int punit_size; - void *punit_base2; + resource_size_t punit_base2; int punit_size2; struct platform_device *punit_dev; } ipcdev; @@ -480,11 +480,11 @@ static int ipc_create_punit_device(void) pdev->dev.parent = ipcdev.dev; res = punit_res; - res->start = (resource_size_t)ipcdev.punit_base; + res->start = ipcdev.punit_base; res->end = res->start + ipcdev.punit_size - 1; res = punit_res + PUNIT_RESOURCE_INTER; - res->start = (resource_size_t)ipcdev.punit_base2; + res->start = ipcdev.punit_base2; res->end = res->start + ipcdev.punit_size2 - 1; ret = platform_device_add_resources(pdev, punit_res, @@ -522,15 +522,15 @@ static int ipc_create_tco_device(void) pdev->dev.parent = ipcdev.dev; res = tco_res + TCO_RESOURCE_ACPI_IO; - res->start = (resource_size_t)ipcdev.acpi_io_base + TCO_BASE_OFFSET; + res->start = ipcdev.acpi_io_base + TCO_BASE_OFFSET; res->end = res->start + TCO_REGS_SIZE - 1; res = tco_res + TCO_RESOURCE_SMI_EN_IO; - res->start = (resource_size_t)ipcdev.acpi_io_base + SMI_EN_OFFSET; + res->start = ipcdev.acpi_io_base + SMI_EN_OFFSET; res->end = res->start + SMI_EN_SIZE - 1; res = tco_res + TCO_RESOURCE_GCR_MEM; - res->start = (resource_size_t)ipcdev.gcr_base; + res->start = ipcdev.gcr_base; res->end = res->start + ipcdev.gcr_size - 1; ret = platform_device_add_resources(pdev, tco_res, ARRAY_SIZE(tco_res)); @@ -589,7 +589,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.acpi_io_base = (void *)res->start; + ipcdev.acpi_io_base = res->start; ipcdev.acpi_io_size = size; dev_info(&pdev->dev, "io res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -601,7 +601,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.punit_base = (void *)res->start; + ipcdev.punit_base = res->start; ipcdev.punit_size = size; dev_info(&pdev->dev, "punit data res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -613,7 +613,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.punit_base2 = (void *)res->start; + ipcdev.punit_base2 = res->start; ipcdev.punit_size2 = size; dev_info(&pdev->dev, "punit interface res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -637,7 +637,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) } ipcdev.ipc_base = addr; - ipcdev.gcr_base = (void *)(res->start + size); + ipcdev.gcr_base = res->start + size; ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE; dev_info(&pdev->dev, "ipc res: %llx %x\n", (long long)res->start, (int)resource_size(res)); -- cgit v0.10.2 From ced53f6d12e497ba210a518e7e76178da987f932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:55 +0200 Subject: dell-laptop: Clear buffer before each SMBIOS call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that before initializing SMBIOS call, the input buffer does not contain any garbage (e.g. values from previous SMBIOS call). This fixes problems with passing undefined/random parameters to SMBIOS functions. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index ed317cc..85fbe7c 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -311,10 +311,15 @@ static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; +static void clear_buffer(void) +{ + memset(buffer, 0, sizeof(struct calling_interface_buffer)); +} + static void get_buffer(void) { mutex_lock(&buffer_mutex); - memset(buffer, 0, sizeof(struct calling_interface_buffer)); + clear_buffer(); } static void release_buffer(void) @@ -558,6 +563,8 @@ static int dell_rfkill_set(void *data, bool blocked) !(buffer->output[1] & BIT(16))) disable = 1; + clear_buffer(); + buffer->input[0] = (1 | (radio<<8) | (disable << 16)); dell_send_request(buffer, 17, 11); @@ -572,6 +579,7 @@ static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio, if (status & BIT(0)) { /* Has hw-switch, sync sw_state to BIOS */ int block = rfkill_blocked(rfkill); + clear_buffer(); buffer->input[0] = (1 | (radio << 8) | (block << 16)); dell_send_request(buffer, 17, 11); } else { @@ -774,6 +782,7 @@ static int __init dell_setup_rfkill(void) get_buffer(); dell_send_request(buffer, 17, 11); status = buffer->output[1]; + clear_buffer(); buffer->input[0] = 0x2; dell_send_request(buffer, 17, 11); hwswitch_state = buffer->output[1]; -- cgit v0.10.2 From 715d0cdd5a459908c2b2eb1cfa30d6f6d1d7d710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:56 +0200 Subject: dell-laptop: Check return value of each SMBIOS call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that return value of each SMBIOS call is properly checked and do not continue processing output if the call failed. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 85fbe7c..efcfc49 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -553,9 +553,15 @@ static int dell_rfkill_set(void *data, bool blocked) int disable = blocked ? 1 : 0; unsigned long radio = (unsigned long)data; int hwswitch_bit = (unsigned long)data - 1; + int ret; get_buffer(); + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + + if (ret != 0) + goto out; /* If the hardware switch controls this radio, and the hardware switch is disabled, always disable the radio */ @@ -567,9 +573,11 @@ static int dell_rfkill_set(void *data, bool blocked) buffer->input[0] = (1 | (radio<<8) | (disable << 16)); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + out: release_buffer(); - return 0; + return dell_smi_error(ret); } /* Must be called with the buffer held */ @@ -598,14 +606,18 @@ static void dell_rfkill_update_hw_state(struct rfkill *rfkill, int radio, static void dell_rfkill_query(struct rfkill *rfkill, void *data) { int status; + int ret; get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; + release_buffer(); - dell_rfkill_update_hw_state(rfkill, (unsigned long)data, status); + if (ret != 0) + return; - release_buffer(); + dell_rfkill_update_hw_state(rfkill, (unsigned long)data, status); } static const struct rfkill_ops dell_rfkill_ops = { @@ -618,12 +630,15 @@ static struct dentry *dell_laptop_dir; static int dell_debugfs_show(struct seq_file *s, void *data) { int status; + int ret; get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; release_buffer(); + seq_printf(s, "return:\t%d\n", ret); seq_printf(s, "status:\t0x%X\n", status); seq_printf(s, "Bit 0 : Hardware switch supported: %lu\n", status & BIT(0)); @@ -702,11 +717,17 @@ static const struct file_operations dell_debugfs_fops = { static void dell_update_rfkill(struct work_struct *ignored) { int status; + int ret; get_buffer(); + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; + if (ret != 0) + goto out; + if (wifi_rfkill) { dell_rfkill_update_hw_state(wifi_rfkill, 1, status); dell_rfkill_update_sw_state(wifi_rfkill, 1, status); @@ -720,6 +741,7 @@ static void dell_update_rfkill(struct work_struct *ignored) dell_rfkill_update_sw_state(wwan_rfkill, 3, status); } + out: release_buffer(); } static DECLARE_DELAYED_WORK(dell_rfkill_work, dell_update_rfkill); @@ -781,6 +803,7 @@ static int __init dell_setup_rfkill(void) get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; clear_buffer(); buffer->input[0] = 0x2; @@ -788,6 +811,10 @@ static int __init dell_setup_rfkill(void) hwswitch_state = buffer->output[1]; release_buffer(); + /* dell wireless info smbios call is not supported */ + if (ret != 0) + return 0; + if (!(status & BIT(0))) { if (force_rfkill) { /* No hwsitch, clear all hw-controlled bits */ @@ -941,47 +968,50 @@ static void dell_cleanup_rfkill(void) static int dell_send_intensity(struct backlight_device *bd) { - int ret = 0; + int token; + int ret; + + token = find_token_location(BRIGHTNESS_TOKEN); + if (token == -1) + return -ENODEV; get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); + buffer->input[0] = token; buffer->input[1] = bd->props.brightness; - if (buffer->input[0] == -1) { - ret = -ENODEV; - goto out; - } - if (power_supply_is_system_supplied() > 0) dell_send_request(buffer, 1, 2); else dell_send_request(buffer, 1, 1); - out: + ret = dell_smi_error(buffer->output[0]); + release_buffer(); return ret; } static int dell_get_intensity(struct backlight_device *bd) { - int ret = 0; + int token; + int ret; - get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); + token = find_token_location(BRIGHTNESS_TOKEN); + if (token == -1) + return -ENODEV; - if (buffer->input[0] == -1) { - ret = -ENODEV; - goto out; - } + get_buffer(); + buffer->input[0] = token; if (power_supply_is_system_supplied() > 0) dell_send_request(buffer, 0, 2); else dell_send_request(buffer, 0, 1); - ret = buffer->output[1]; + if (buffer->output[0]) + ret = dell_smi_error(buffer->output[0]); + else + ret = buffer->output[1]; - out: release_buffer(); return ret; } @@ -2045,6 +2075,7 @@ static void kbd_led_exit(void) static int __init dell_init(void) { int max_intensity = 0; + int token; int ret; if (!dmi_check_system(dell_device_table)) @@ -2103,13 +2134,15 @@ static int __init dell_init(void) if (acpi_video_get_backlight_type() != acpi_backlight_vendor) return 0; - get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); - if (buffer->input[0] != -1) { + token = find_token_location(BRIGHTNESS_TOKEN); + if (token != -1) { + get_buffer(); + buffer->input[0] = token; dell_send_request(buffer, 0, 2); - max_intensity = buffer->output[3]; + if (buffer->output[0] == 0) + max_intensity = buffer->output[3]; + release_buffer(); } - release_buffer(); if (max_intensity) { struct backlight_properties props; -- cgit v0.10.2 From 22565ba0bf231eb4267b1d2ebad300d55b28a427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:57 +0200 Subject: dell-laptop: Do not cache hwswitch state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hwswitch state can be changed at runtime, so make sure dell-laptop always knows the current state. It can be modified by the userspace utility smbios-wireless-ctl. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index efcfc49..aaeeae8 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -309,8 +309,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { static struct calling_interface_buffer *buffer; static DEFINE_MUTEX(buffer_mutex); -static int hwswitch_state; - static void clear_buffer(void) { memset(buffer, 0, sizeof(struct calling_interface_buffer)); @@ -553,20 +551,30 @@ static int dell_rfkill_set(void *data, bool blocked) int disable = blocked ? 1 : 0; unsigned long radio = (unsigned long)data; int hwswitch_bit = (unsigned long)data - 1; + int hwswitch; + int status; int ret; get_buffer(); dell_send_request(buffer, 17, 11); ret = buffer->output[0]; + status = buffer->output[1]; if (ret != 0) goto out; + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + hwswitch = buffer->output[1]; + /* If the hardware switch controls this radio, and the hardware switch is disabled, always disable the radio */ - if ((hwswitch_state & BIT(hwswitch_bit)) && - !(buffer->output[1] & BIT(16))) + if (ret == 0 && (hwswitch & BIT(hwswitch_bit)) && + (status & BIT(0)) && !(status & BIT(16))) disable = 1; clear_buffer(); @@ -597,27 +605,43 @@ static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio, } static void dell_rfkill_update_hw_state(struct rfkill *rfkill, int radio, - int status) + int status, int hwswitch) { - if (hwswitch_state & (BIT(radio - 1))) + if (hwswitch & (BIT(radio - 1))) rfkill_set_hw_state(rfkill, !(status & BIT(16))); } static void dell_rfkill_query(struct rfkill *rfkill, void *data) { + int radio = ((unsigned long)data & 0xF); + int hwswitch; int status; int ret; get_buffer(); + dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; + + if (ret != 0 || !(status & BIT(0))) { + release_buffer(); + return; + } + + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + hwswitch = buffer->output[1]; + release_buffer(); if (ret != 0) return; - dell_rfkill_update_hw_state(rfkill, (unsigned long)data, status); + dell_rfkill_update_hw_state(rfkill, radio, status, hwswitch); } static const struct rfkill_ops dell_rfkill_ops = { @@ -629,13 +653,24 @@ static struct dentry *dell_laptop_dir; static int dell_debugfs_show(struct seq_file *s, void *data) { + int hwswitch_state; + int hwswitch_ret; int status; int ret; get_buffer(); + dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; + + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + hwswitch_ret = buffer->output[0]; + hwswitch_state = buffer->output[1]; + release_buffer(); seq_printf(s, "return:\t%d\n", ret); @@ -680,7 +715,8 @@ static int dell_debugfs_show(struct seq_file *s, void *data) seq_printf(s, "Bit 21: WiGig is blocked: %lu\n", (status & BIT(21)) >> 21); - seq_printf(s, "\nhwswitch_state:\t0x%X\n", hwswitch_state); + seq_printf(s, "\nhwswitch_return:\t%d\n", hwswitch_ret); + seq_printf(s, "hwswitch_state:\t0x%X\n", hwswitch_state); seq_printf(s, "Bit 0 : Wifi controlled by switch: %lu\n", hwswitch_state & BIT(0)); seq_printf(s, "Bit 1 : Bluetooth controlled by switch: %lu\n", @@ -716,6 +752,7 @@ static const struct file_operations dell_debugfs_fops = { static void dell_update_rfkill(struct work_struct *ignored) { + int hwswitch = 0; int status; int ret; @@ -728,16 +765,26 @@ static void dell_update_rfkill(struct work_struct *ignored) if (ret != 0) goto out; + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + + if (ret == 0 && (status & BIT(0))) + hwswitch = buffer->output[1]; + if (wifi_rfkill) { - dell_rfkill_update_hw_state(wifi_rfkill, 1, status); + dell_rfkill_update_hw_state(wifi_rfkill, 1, status, hwswitch); dell_rfkill_update_sw_state(wifi_rfkill, 1, status); } if (bluetooth_rfkill) { - dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status); + dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status, + hwswitch); dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status); } if (wwan_rfkill) { - dell_rfkill_update_hw_state(wwan_rfkill, 3, status); + dell_rfkill_update_hw_state(wwan_rfkill, 3, status, hwswitch); dell_rfkill_update_sw_state(wwan_rfkill, 3, status); } @@ -805,25 +852,15 @@ static int __init dell_setup_rfkill(void) dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; - clear_buffer(); - buffer->input[0] = 0x2; - dell_send_request(buffer, 17, 11); - hwswitch_state = buffer->output[1]; release_buffer(); /* dell wireless info smbios call is not supported */ if (ret != 0) return 0; - if (!(status & BIT(0))) { - if (force_rfkill) { - /* No hwsitch, clear all hw-controlled bits */ - hwswitch_state &= ~7; - } else { - /* rfkill is only tested on laptops with a hwswitch */ - return 0; - } - } + /* rfkill is only tested on laptops with a hwswitch */ + if (!(status & BIT(0)) && !force_rfkill) + return 0; if ((status & (1<<2|1<<8)) == (1<<2|1<<8)) { wifi_rfkill = rfkill_alloc("dell-wifi", &platform_device->dev, -- cgit v0.10.2 From f9c87a6f46d508eae0d9ae640be98d50f237f827 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Jul 2015 17:58:19 +0200 Subject: s390/sclp: clear upper register halves in _sclp_print_early If the kernel is compiled with gcc 5.1 and the XZ compression option the decompress_kernel function calls _sclp_print_early in 64-bit mode while the content of the upper register half of %r6 is non-zero. This causes a specification exception on the servc instruction in _sclp_servc. The _sclp_print_early function saves and restores the upper registers halves but it fails to clear them for the 31-bit code of the mini sclp driver. Cc: Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 43c3169..ada0c07 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -270,6 +270,8 @@ ENTRY(_sclp_print_early) jno .Lesa2 ahi %r15,-80 stmh %r6,%r15,96(%r15) # store upper register halves + basr %r13,0 + lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves .Lesa2: lr %r10,%r2 # save string pointer lhi %r2,0 @@ -291,6 +293,8 @@ ENTRY(_sclp_print_early) .Lesa3: lm %r6,%r15,120(%r15) # restore registers br %r14 +.Lzeroes: + .fill 64,4,0 .LwritedataS4: .long 0x00760005 # SCLP command for write data -- cgit v0.10.2 From bb8bd38b9a1685334b73e8c62e128cbedb875867 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 1 Jul 2015 12:57:40 -0400 Subject: bio integrity: do not assume bio_integrity_pool exists if bioset exists bio_integrity_alloc() and bio_integrity_free() assume that if a bio was allocated from a bioset that that bioset also had its bio_integrity_pool allocated using bioset_integrity_create(). This is a very bad assumption given that bioset_create() and bioset_integrity_create() are completely disjoint. Not all callers of bioset_create() have been trained to also call bioset_integrity_create() -- and they may not care to be. Fix this by falling back to kmalloc'ing 'struct bio_integrity_payload' rather than force all bioset consumers to (wastefully) preallocate a bio_integrity_pool that they very likely won't actually need (given the niche nature of the current block integrity support). Otherwise, a NULL pointer "Kernel BUG" with a trace like the following will be observed (as seen on s390x using zfcp storage) because dm-io doesn't use bioset_integrity_create() when creating its bioset: [ 791.643338] Call Trace: [ 791.643339] ([<00000003df98b848>] 0x3df98b848) [ 791.643341] [<00000000002c5de8>] bio_integrity_alloc+0x48/0xf8 [ 791.643348] [<00000000002c6486>] bio_integrity_prep+0xae/0x2f0 [ 791.643349] [<0000000000371e38>] blk_queue_bio+0x1c8/0x3d8 [ 791.643355] [<000000000036f8d0>] generic_make_request+0xc0/0x100 [ 791.643357] [<000000000036f9b2>] submit_bio+0xa2/0x198 [ 791.643406] [<000003ff801f9774>] dispatch_io+0x15c/0x3b0 [dm_mod] [ 791.643419] [<000003ff801f9b3e>] dm_io+0x176/0x2f0 [dm_mod] [ 791.643423] [<000003ff8074b28a>] do_reads+0x13a/0x1a8 [dm_mirror] [ 791.643425] [<000003ff8074b43a>] do_mirror+0x142/0x298 [dm_mirror] [ 791.643428] [<0000000000154fca>] process_one_work+0x18a/0x3f8 [ 791.643432] [<000000000015598a>] worker_thread+0x132/0x3b0 [ 791.643435] [<000000000015d49a>] kthread+0xd2/0xd8 [ 791.643438] [<00000000005bc0ca>] kernel_thread_starter+0x6/0xc [ 791.643446] [<00000000005bc0c4>] kernel_thread_starter+0x0/0xc Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 0436c21..719b715 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -51,7 +51,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; - if (!bs) { + if (!bs || !bs->bio_integrity_pool) { bip = kmalloc(sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * nr_vecs, gfp_mask); inline_vecs = nr_vecs; @@ -104,7 +104,7 @@ void bio_integrity_free(struct bio *bio) kfree(page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset); - if (bs) { + if (bs && bs->bio_integrity_pool) { if (bip->bip_slab != BIO_POOL_NONE) bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); -- cgit v0.10.2 From 0762b23d23c1f23beab91a3af0fa89749b75f03c Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Tue, 7 Jul 2015 12:41:07 +0530 Subject: block: use FIELD_SIZEOF to calculate size of a field use FIELD_SIZEOF instead of open coding Signed-off-by: Maninder Singh Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 82819e6..627ed0c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3370,7 +3370,7 @@ EXPORT_SYMBOL(blk_post_runtime_resume); int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * - sizeof(((struct request *)0)->cmd_flags)); + FIELD_SIZEOF(struct request, cmd_flags)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", -- cgit v0.10.2 From a322baad1003798312741b0cb97bd2c7511ccf61 Mon Sep 17 00:00:00 2001 From: Arianna Avanzini Date: Tue, 7 Jul 2015 03:08:15 +0200 Subject: block/blk-cgroup.c: free per-blkcg data when freeing the blkcg Currently, per-blkcg data is freed each time a policy is deactivated, that is also upon scheduler switch. However, when switching from a scheduler implementing a policy which requires per-blkcg data to another one, that same policy might be active on other devices, and therefore those same per-blkcg data could be still in use. This commit lets per-blkcg data be freed when the blkcg is freed instead of on policy deactivation. Signed-off-by: Arianna Avanzini Reported-and-tested-by: Michael Kaminsky Fixes: e48453c3 ("block, cgroup: implement policy-specific per-blkcg data") Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9f97da5..5e2723f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -822,8 +822,13 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); - if (blkcg != &blkcg_root) + if (blkcg != &blkcg_root) { + int i; + + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkcg->pd[i]); kfree(blkcg); + } } static struct cgroup_subsys_state * @@ -1162,8 +1167,6 @@ void blkcg_deactivate_policy(struct request_queue *q, kfree(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; - kfree(blkg->blkcg->pd[pol->plid]); - blkg->blkcg->pd[pol->plid] = NULL; spin_unlock(&blkg->blkcg->lock); } -- cgit v0.10.2 From 030f4e968741d65aea9cd5f7814d1164967801ef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Jul 2015 17:30:25 +0800 Subject: crypto: nx - Fix reentrancy bugs This patch fixes a host of reentrancy bugs in the nx driver. The following algorithms are affected: * CCM * GCM * CTR * XCBC * SHA256 * SHA512 The crypto API allows a single transform to be used by multiple threads simultaneously. For example, IPsec will use a single tfm to process packets for a given SA. As packets may arrive on multiple CPUs that tfm must be reentrant. The nx driver does try to deal with this by using a spin lock. Unfortunately only the basic AES/CBC/ECB algorithms do this in the correct way. The symptom of these bugs may range from the generation of incorrect output to memory corruption. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f8081..e4311ce 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ out: static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4..dd7e9f3 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 08ac6d4..92c993f 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -317,6 +317,7 @@ out: static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -326,7 +327,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -424,8 +425,8 @@ out: static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -434,8 +435,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -445,7 +446,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -457,7 +459,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faff..c2f7d4b 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ out: return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; - nx_ctx_init(nx_ctx, HCOP_FC_AES); + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_AES); NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); - - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); + return 0; +} - if (len != AES_BLOCK_SIZE) - return -EINVAL; +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 4e91bdb..08f8d5c 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,34 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA256_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); - if (len != SHA256_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -78,6 +72,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; @@ -108,6 +103,16 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -282,7 +287,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index e6a58d2..aff0fe5 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,34 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA512_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); - if (len != SHA512_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -77,6 +72,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; @@ -107,6 +103,16 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -288,7 +294,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index f6198f2..4369713 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -713,12 +713,15 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct nx_ccm_rctx)); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm) { + crypto_aead_set_reqsize(tfm, sizeof(struct nx_gcm_rctx)); return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index de3ea87..cdff03a 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { -- cgit v0.10.2 From 91c269a0d3eadd63f7112411ee812fbc170dc488 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Jul 2015 20:55:35 +0200 Subject: MAINTAINER: add bridge netfilter So scripts/get_maintainer.pl shows the Netfilter mailing lists. Reported-by: Julien Grall Signed-off-by: Pablo Neira Ayuso diff --git a/MAINTAINERS b/MAINTAINERS index 993d4cf..8183b465 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6996,6 +6996,7 @@ F: include/uapi/linux/netfilter/ F: net/*/netfilter.c F: net/*/netfilter/ F: net/netfilter/ +F: net/bridge/br_netfilter*.c NETLABEL M: Paul Moore -- cgit v0.10.2 From 86e8971800381c3a8d8d9327f83b1f97ccb04a4f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 7 Jul 2015 15:55:21 +0100 Subject: netfilter: bridge: Use __in6_dev_get rather than in6_dev_get in br_validate_ipv6 The commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd "netfilter: bridge: forward IPv6 fragmented packets" introduced a new function br_validate_ipv6 which take a reference on the inet6 device. Although, the reference is not released at the end. This will result to the impossibility to destroy any netdevice using ipv6 and bridge. It's possible to directly retrieve the inet6 device without taking a reference as all netfilter hooks are protected by rcu_read_lock via nf_hook_slow. Spotted while trying to destroy a Xen guest on the upstream Linux: "unregister_netdevice: waiting for vif1.0 to become free. Usage count = 1" Signed-off-by: Julien Grall Cc: Bernhard Thaler Cc: Pablo Neira Ayuso Cc: fw@strlen.de Cc: ian.campbell@citrix.com Cc: wei.liu2@citrix.com Cc: Bob Liu Acked-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6d12d26..13b7d1e 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb) { const struct ipv6hdr *hdr; struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); -- cgit v0.10.2 From 6224beb12e190ff11f3c7d4bf50cb2922878f600 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 7 Jul 2015 15:05:03 -0400 Subject: tracing: Have branch tracer use recursive field of task struct Fengguang Wu's tests triggered a bug in the branch tracer's start up test when CONFIG_DEBUG_PREEMPT set. This was because that config adds some debug logic in the per cpu field, which calls back into the branch tracer. The branch tracer has its own recursive checks, but uses a per cpu variable to implement it. If retrieving the per cpu variable calls back into the branch tracer, you can see how things will break. Instead of using a per cpu variable, use the trace_recursion field of the current task struct. Simply set a bit when entering the branch tracing and clear it when leaving. If the bit is set on entry, just don't do the tracing. There's also the case with lockdep, as the local_irq_save() called before the recursion can also trigger code that can call back into the function. Changing that to a raw_local_irq_save() will protect that as well. This prevents the recursion and the inevitable crash that follows. Link: http://lkml.kernel.org/r/20150630141803.GA28071@wfg-t540p.sh.intel.com Cc: stable@vger.kernel.org # 3.10+ Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f060716..74bde81 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index a87b43f..e2e12ad 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline -- cgit v0.10.2 From 32f675bbc9be14a40d972820e190ac56341ce198 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jul 2015 15:57:19 +0200 Subject: net_sched: gen_estimator: extend pps limit rate estimators are limited to 4 Mpps, which was fine years ago, but too small with current hardware generation. Lets use 2^5 scaling instead of 2^10 to get 128 Mpps new limit. On 64bit arch, use an "unsigned long" for temp storage and remove limit. (We do not expect 32bit arches to be able to reach this point) Tested: tc -s -d filter sh dev eth0 parent ffff: filter protocol ip pref 1 u32 filter protocol ip pref 1 u32 fh 800: ht divisor 1 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 match 07000000/ff000000 at 12 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 166 sec Action statistics: Sent 39734251496 bytes 863788076 pkt (dropped 863788117, overlimits 0 requeues 0) rate 4067Mbit 11053596pps backlog 0b 0p requeues 0 Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9dfb88a..92d886f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ NOTES. - * avbps is scaled by 2^5, avpps is scaled by 2^10. + * avbps and avpps are scaled by 2^5. * both values are reported as 32 bit unsigned values. bps can overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor @@ -85,10 +85,10 @@ struct gen_estimator struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; + u32 last_packets; + unsigned long avpps; u64 last_bytes; u64 avbps; - u32 last_packets; - u32 avpps; struct rcu_head e_rcu; struct rb_node node; struct gnet_stats_basic_cpu __percpu *cpu_bstats; @@ -118,8 +118,8 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { struct gnet_stats_basic_packed b = {0}; + unsigned long rate; u64 brate; - u32 rate; spin_lock(e->stats_lock); read_lock(&est_lock); @@ -133,10 +133,11 @@ static void est_timer(unsigned long arg) e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; - rate = (b.packets - e->last_packets)<<(12 - idx); + rate = b.packets - e->last_packets; + rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps+0x1FF)>>10; + e->rate_est->pps = (e->avpps + 0xF) >> 5; skip: read_unlock(&est_lock); spin_unlock(e->stats_lock); -- cgit v0.10.2 From aa43c5ff737dac5dcfccd12d23abeefbdf0579c7 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 4 Jul 2015 11:22:30 +0200 Subject: NET: hamradio: Fix IP over bpq encapsulation. Since 1d5da757da860a6916adbf68b09e868062b4b3b8 (ax25: Stop using magic neighbour cache operations.) any attempt to transmit IP packets over a bpqether device will result in a message like "Dead loop on virtual device bpq0, fix it urgently!" Fix suggested by Eric W. Biederman . Signed-off-by: Ralf Baechle Cc: # 4.1 Signed-off-by: David S. Miller diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 7856b6c..d95a50a 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -482,6 +482,7 @@ static void bpq_setup(struct net_device *dev) memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); dev->flags = 0; + dev->features = NETIF_F_LLTX; /* Allow recursion */ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) dev->header_ops = &ax25_header_ops; -- cgit v0.10.2 From cfa520056407acaa29f3aa7aee10c929b371c7cf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 5 Jul 2015 12:16:27 -0500 Subject: net: phy: add dependency on HAS_IOMEM to MDIO_BUS_MUX_MMIOREG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On UML builds, mdio-mux-mmioreg.c fails to compile: drivers/net/phy/mdio-mux-mmioreg.c:50:3: error: implicit declaration of function ‘ioremap’ [-Werror=implicit-function-declaration] drivers/net/phy/mdio-mux-mmioreg.c:63:3: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] This is due to CONFIG_OF now being user selectable. Add a dependency on HAS_IOMEM to fix this. Signed-off-by: Rob Herring Cc: Florian Fainelli Cc: David S. Miller Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index cf18940..cb86d7a 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -191,7 +191,7 @@ config MDIO_BUS_MUX_GPIO config MDIO_BUS_MUX_MMIOREG tristate "Support for MMIO device-controlled MDIO bus multiplexers" - depends on OF_MDIO + depends on OF_MDIO && HAS_IOMEM select MDIO_BUS_MUX help This module provides a driver for MDIO bus multiplexers that -- cgit v0.10.2 From f7e2965db17dd3b60f05fad88e7afc79ea75b48f Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Mon, 6 Jul 2015 05:53:35 -0700 Subject: bridge: mdb: start delete timer for temp static entries Start the delete timer when adding temp static entries so they can expire. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires") Signed-off-by: David S. Miller diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70..3bfc675 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_htable *mdb; + unsigned long now = jiffies; int err; mdb = mlock_dereference(br->mdb, br); @@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + if (state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; -- cgit v0.10.2 From 142b942a75cb10ede1b42bf85368d41449ab4e3b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 6 Jul 2015 15:51:20 +0200 Subject: rhashtable: fix for resize events during table walk If rhashtable_walk_next detects a resize operation in progress, it jumps to the new table and continues walking that one. But it misses to drop the reference to it's current item, leading it to continue traversing the new table's bucket in which the current item is sorted into, and after reaching that bucket's end continues traversing the new table's second bucket instead of the first one, thereby potentially missing items. This fixes the rhashtable runtime test for me. Bug probably introduced by Herbert Xu's patch eddee5ba ("rhashtable: Fix walker behaviour during rehash") although not explicitly tested. Fixes: eddee5ba ("rhashtable: Fix walker behaviour during rehash") Signed-off-by: Phil Sutter Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a60a6d3..cc0c697 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -610,6 +610,8 @@ next: iter->skip = 0; } + iter->p = NULL; + /* Ensure we see any new tables. */ smp_rmb(); @@ -620,8 +622,6 @@ next: return ERR_PTR(-EAGAIN); } - iter->p = NULL; - return NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); -- cgit v0.10.2 From d339727c2b1a10f25e6636670ab6e1841170e328 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 17:13:26 +0200 Subject: net: graceful exit from netif_alloc_netdev_queues() User space can crash kernel with ip link add ifb10 numtxqueues 100000 type ifb We must replace a BUG_ON() by proper test and return -EINVAL for crazy values. Fixes: 60877a32bce00 ("net: allow large number of tx queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 6778a99..0ad6262 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { -- cgit v0.10.2 From 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 6 Jul 2015 17:25:10 +0200 Subject: Revert "dev: set iflink to 0 for virtual interfaces" This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c. The side effect of this commit is to add a '@NONE' after each virtual interface name with a 'ip link'. It may break existing scripts. Reported-by: Olivier Hartkopp Signed-off-by: Nicolas Dichtel Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 0ad6262..1e57efd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); -- cgit v0.10.2 From cfbfd86bfde15020bccde377e11586ee5c8b701d Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Mon, 6 Jul 2015 11:57:37 -0500 Subject: amd-xgbe: Fix DMA API debug warning When running a kernel configured with CONFIG_DMA_API_DEBUG=y a warning is issued: DMA-API: device driver tries to sync DMA memory it has not allocated This warning is the result of mapping the full range of the Rx buffer pages allocated and then performing a dma_sync_single_for_cpu against a calculated DMA address. The proper thing to do is to use the dma_sync_single_range_for_cpu with a base DMA address and an offset. Reported-by: Kim Phillips Signed-off-by: Tom Lendacky Tested-by: Kim Phillips Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 661cdaa..b3bc87f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -303,7 +303,8 @@ static void xgbe_set_buffer_data(struct xgbe_buffer_data *bd, get_page(pa->pages); bd->pa = *pa; - bd->dma = pa->pages_dma + pa->pages_offset; + bd->dma_base = pa->pages_dma; + bd->dma_off = pa->pages_offset; bd->dma_len = len; pa->pages_offset += len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 506e832..a4473d8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1110,6 +1110,7 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, unsigned int rx_usecs = pdata->rx_usecs; unsigned int rx_frames = pdata->rx_frames; unsigned int inte; + dma_addr_t hdr_dma, buf_dma; if (!rx_usecs && !rx_frames) { /* No coalescing, interrupt for every descriptor */ @@ -1129,10 +1130,12 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, * Set buffer 2 (hi) address to buffer dma address (hi) and * set control bits OWN and INTE */ - rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx.hdr.dma)); - rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx.hdr.dma)); - rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx.buf.dma)); - rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx.buf.dma)); + hdr_dma = rdata->rx.hdr.dma_base + rdata->rx.hdr.dma_off; + buf_dma = rdata->rx.buf.dma_base + rdata->rx.buf.dma_off; + rdesc->desc0 = cpu_to_le32(lower_32_bits(hdr_dma)); + rdesc->desc1 = cpu_to_le32(upper_32_bits(hdr_dma)); + rdesc->desc2 = cpu_to_le32(lower_32_bits(buf_dma)); + rdesc->desc3 = cpu_to_le32(upper_32_bits(buf_dma)); XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, inte); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 1e9c28d..aae9d5e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1765,8 +1765,9 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, /* Start with the header buffer which may contain just the header * or the header plus data */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.hdr.dma, - rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base, + rdata->rx.hdr.dma_off, + rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; @@ -1778,8 +1779,11 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, len -= copy_len; if (len) { /* Add the remaining data as a frag */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.buf.dma, - rdata->rx.buf.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, + rdata->rx.buf.dma_len, + DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, @@ -1945,8 +1949,9 @@ read_again: if (!skb) error = 1; } else if (rdesc_len) { - dma_sync_single_for_cpu(pdata->dev, - rdata->rx.buf.dma, + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, rdata->rx.buf.dma_len, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 63d72a1..717ce21 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -337,7 +337,8 @@ struct xgbe_buffer_data { struct xgbe_page_alloc pa; struct xgbe_page_alloc pa_unmap; - dma_addr_t dma; + dma_addr_t dma_base; + unsigned long dma_off; unsigned int dma_len; }; -- cgit v0.10.2 From 6c3e921b18edca290099adfddde8a50236bf2d80 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 6 Jul 2015 20:34:55 +0200 Subject: net: fec: Ensure clocks are enabled while using mdio bus When a switch is attached to the mdio bus, the mdio bus can be used while the interface is not open. If the IPG clock is not enabled, MDIO reads/writes will simply time out. Add support for runtime PM to control this clock. Enable/disable this clock using runtime PM, with open()/close() and mdio read()/write() function triggering runtime PM operations. Since PM is optional, the IPG clock is enabled at probe and is no longer modified by fec_enet_clk_enable(), thus if PM is not enabled in the kernel, it is guaranteed the clock is running when MDIO operations are performed. Signed-off-by: Andrew Lunn Acked-by: Fugang Duan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1f89c59..42e20e5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 +#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1767,7 +1769,13 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1783,18 +1791,30 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - /* return value */ - return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1811,10 +1831,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; } - return 0; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1826,9 +1849,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1852,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1874,8 +1893,6 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2847,10 +2864,14 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(&fep->pdev->dev); + if (IS_ERR_VALUE(ret)) + return ret; + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - return ret; + goto clk_enable; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2881,6 +2902,9 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); +clk_enable: + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2903,6 +2927,9 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); + fec_enet_free_buffers(ndev); return 0; @@ -3388,6 +3415,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3434,6 +3465,8 @@ fec_probe(struct platform_device *pdev) netif_carrier_off(ndev); fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); ret = register_netdev(ndev); if (ret) @@ -3447,6 +3480,12 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); + + pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; failed_register: @@ -3457,6 +3496,8 @@ failed_init: if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3568,7 +3609,28 @@ failed_clk: return ret; } -static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); +static int __maybe_unused fec_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int __maybe_unused fec_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + return clk_prepare_enable(fep->clk_ipg); +} + +static const struct dev_pm_ops fec_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) + SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) +}; static struct platform_driver fec_driver = { .driver = { -- cgit v0.10.2 From 4f7d2cdfdde71ffe962399b7020c674050329423 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jul 2015 00:07:52 +0200 Subject: rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes anymore with respect to their policy, that is, ifla_vfinfo_policy[]. Before, they were part of ifla_policy[], but they have been nested since placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, which is another nested attribute for the actual VF attributes such as IFLA_VF_MAC, IFLA_VF_VLAN, etc. Despite the policy being split out from ifla_policy[] in this commit, it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() testing for struct nlattr, but it doesn't know about the data context and their requirements. Fix, on top of Jason's initial work, does 1) parsing of the attributes with the right policy, and 2) using the resulting parsed attribute table from 1) instead of the nla_for_each_nested() loop (just like we used to do when still part of ifla_policy[]). Reference: http://thread.gmane.org/gmane.linux.network/368913 Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") Reported-by: Jason Gunthorpe Cc: Chris Wright Cc: Sucheta Chakraborty Cc: Greg Rose Cc: Jeff Kirsher Cc: Rony Efraim Cc: Vlad Zolotarov Cc: Nicolas Dichtel Cc: Thomas Graf Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Acked-by: Vlad Zolotarov Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 01ced4a..9e433d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; -- cgit v0.10.2 From fc24f2b2094366da8786f59f2606307e934cea17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 7 Jul 2015 08:34:13 +0300 Subject: ip_tunnel: fix ipv4 pmtu check to honor inner ip header df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frag needed should be sent only if the inner header asked to not fragment. Currently fragmentation is broken if the tunnel has df set, but df was not asked in the original packet. The tunnel's df needs to be still checked to update internally the pmtu cache. Commit 23a3647bc4f93bac broke it, and this commit fixes the ipv4 df check back to the way it was. Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba..626d9e5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } -- cgit v0.10.2 From b5a983f3141239b5b0b4a4e66efa31f8a26354b8 Mon Sep 17 00:00:00 2001 From: Mazhar Rana Date: Tue, 7 Jul 2015 15:04:50 +0530 Subject: bonding: "primary_reselect" with "failure" is not working properly When "primary_reselect" is set to "failure", primary interface should not become active until current active slave is down. But if we set first member of bond device as a "primary" interface and "primary_reselect" is set to "failure" then whenever primary interface's link get back(up) it become active slave even if current active slave is still up. With this patch, "bond_find_best_slave" will not traverse members if primary interface is not candidate for failover/reselection and current active slave is still up. Signed-off-by: Mazhar Rana Signed-off-by: Jay Vosburgh Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 19eb990..317a494 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -689,40 +689,57 @@ out: } -static bool bond_should_change_active(struct bonding *bond) +static struct slave *bond_choose_primary_or_current(struct bonding *bond) { struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); - if (!prim || !curr || curr->link != BOND_LINK_UP) - return true; + if (!prim || prim->link != BOND_LINK_UP) { + if (!curr || curr->link != BOND_LINK_UP) + return NULL; + return curr; + } + if (bond->force_primary) { bond->force_primary = false; - return true; + return prim; + } + + if (!curr || curr->link != BOND_LINK_UP) + return prim; + + /* At this point, prim and curr are both up */ + switch (bond->params.primary_reselect) { + case BOND_PRI_RESELECT_ALWAYS: + return prim; + case BOND_PRI_RESELECT_BETTER: + if (prim->speed < curr->speed) + return curr; + if (prim->speed == curr->speed && prim->duplex <= curr->duplex) + return curr; + return prim; + case BOND_PRI_RESELECT_FAILURE: + return curr; + default: + netdev_err(bond->dev, "impossible primary_reselect %d\n", + bond->params.primary_reselect); + return curr; } - if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && - (prim->speed < curr->speed || - (prim->speed == curr->speed && prim->duplex <= curr->duplex))) - return false; - if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) - return false; - return true; } /** - * find_best_interface - select the best available slave to be the active one + * bond_find_best_slave - select the best available slave to be the active one * @bond: our bonding struct */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *slave, *bestslave = NULL, *primary; + struct slave *slave, *bestslave = NULL; struct list_head *iter; int mintime = bond->params.updelay; - primary = rtnl_dereference(bond->primary_slave); - if (primary && primary->link == BOND_LINK_UP && - bond_should_change_active(bond)) - return primary; + slave = bond_choose_primary_or_current(bond); + if (slave) + return slave; bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) -- cgit v0.10.2 From 9e9f665a18008999e749bd41394efcbf5c37a726 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:00 +0100 Subject: sfc: refactor code in efx_ef10_set_mac_address() Re-organize the structure of error handling to avoid having to duplicate the netif_err() around the ifdefs. The only change to the behaviour of the error-handling is that the PF's data structure to record VF details should only be updated if the original command succeeded. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 8476434..9740cd0 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3829,38 +3829,27 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx_net_open(efx->net_dev); netif_device_attach(efx->net_dev); -#if !defined(CONFIG_SFC_SRIOV) - if (rc == -EPERM) - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); -#else - if (rc == -EPERM) { +#ifdef CONFIG_SFC_SRIOV + if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - /* Switch to PF and change MAC address on vport */ - if (efx->pci_dev->is_virtfn && pci_dev_pf) { - struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); + if (rc == -EPERM) { + struct efx_nic *efx_pf; - if (!efx_ef10_sriov_set_vf_mac(efx_pf, - nic_data->vf_index, - efx->net_dev->dev_addr)) - return 0; - } - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); - } else if (efx->pci_dev->is_virtfn) { - /* Successfully changed by VF (with MAC spoofing), so update the - * parent PF if possible. - */ - struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; + /* Switch to PF and change MAC address on vport */ + efx_pf = pci_get_drvdata(pci_dev_pf); - if (pci_dev_pf) { + rc = efx_ef10_sriov_set_vf_mac(efx_pf, + nic_data->vf_index, + efx->net_dev->dev_addr); + } else if (!rc) { struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; unsigned int i; + /* MAC address successfully changed by VF (with MAC + * spoofing) so update the parent PF if possible. + */ for (i = 0; i < efx_pf->vf_count; ++i) { struct ef10_vf *vf = nic_data->vf + i; @@ -3871,8 +3860,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) } } } - } + } else #endif + if (rc == -EPERM) { + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable" + " mac-spoofing on this interface\n"); + } + return rc; } -- cgit v0.10.2 From 7a186f4703de6f4b7feb5d09735b096145b8918c Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:19 +0100 Subject: sfc: add legacy method for changing a PF's MAC address Some versions of MCFW do not support the MC_CMD_VADAPTOR_SET_MAC command, and ENOSYS will be returned. If the PF created its own vport, the function's datapath must be stopped and the vport can be reconfigured to reflect the new MAC address. If the MCFW created the vport for the PF (which is the case when the nic_data->vport_mac is blank), nothing further needs to be done as the vport is not under the control of the PF. This only applies to PFs because the MCFW in question does not support VFs. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9740cd0..e0cb361 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -101,6 +101,11 @@ static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) return resource_size(&efx->pci_dev->resource[bar]); } +static bool efx_ef10_is_vf(struct efx_nic *efx) +{ + return efx->type->is_vf; +} + static int efx_ef10_get_pf_index(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); @@ -677,6 +682,48 @@ static int efx_ef10_probe_pf(struct efx_nic *efx) return efx_ef10_probe(efx); } +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + #ifdef CONFIG_SFC_SRIOV static int efx_ef10_probe_vf(struct efx_nic *efx) { @@ -3804,6 +3851,72 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) WARN_ON(remove_failed); } +static int efx_ef10_vport_set_mac_address(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 mac_old[ETH_ALEN]; + int rc, rc2; + + /* Only reconfigure a PF-created vport */ + if (is_zero_ether_addr(nic_data->vport_mac)) + return 0; + + efx_device_detach_sync(efx); + efx_net_stop(efx->net_dev); + down_write(&efx->filter_sem); + efx_ef10_filter_table_remove(efx); + up_write(&efx->filter_sem); + + rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id); + if (rc) + goto restore_filters; + + ether_addr_copy(mac_old, nic_data->vport_mac); + rc = efx_ef10_vport_del_mac(efx, nic_data->vport_id, + nic_data->vport_mac); + if (rc) + goto restore_vadaptor; + + rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, + efx->net_dev->dev_addr); + if (!rc) { + ether_addr_copy(nic_data->vport_mac, efx->net_dev->dev_addr); + } else { + rc2 = efx_ef10_vport_add_mac(efx, nic_data->vport_id, mac_old); + if (rc2) { + /* Failed to add original MAC, so clear vport_mac */ + eth_zero_addr(nic_data->vport_mac); + goto reset_nic; + } + } + +restore_vadaptor: + rc2 = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + if (rc2) + goto reset_nic; +restore_filters: + down_write(&efx->filter_sem); + rc2 = efx_ef10_filter_table_probe(efx); + up_write(&efx->filter_sem); + if (rc2) + goto reset_nic; + + rc2 = efx_net_open(efx->net_dev); + if (rc2) + goto reset_nic; + + netif_device_attach(efx->net_dev); + + return rc; + +reset_nic: + netif_err(efx, drv, efx->net_dev, + "Failed to restore when changing MAC address - scheduling reset\n"); + efx_schedule_reset(efx, RESET_TYPE_DATAPATH); + + return rc ? rc : rc2; +} + static int efx_ef10_set_mac_address(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); @@ -3866,6 +3979,13 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) netif_err(efx, drv, efx->net_dev, "Cannot change MAC address; use sfboot to enable" " mac-spoofing on this interface\n"); + } else if (rc == -ENOSYS && !efx_ef10_is_vf(efx)) { + /* If the active MCFW does not support MC_CMD_VADAPTOR_SET_MAC + * fall-back to the method of changing the MAC address on the + * vport. This only applies to PFs because such versions of + * MCFW do not support VFs. + */ + rc = efx_ef10_vport_set_mac_address(efx); } return rc; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 6c9b6e4..7485f71 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -29,30 +29,6 @@ static int efx_ef10_evb_port_assign(struct efx_nic *efx, unsigned int port_id, NULL, 0, NULL); } -static int efx_ef10_vport_add_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - -static int efx_ef10_vport_del_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - static int efx_ef10_vswitch_alloc(struct efx_nic *efx, unsigned int port_id, unsigned int vswitch_type) { @@ -136,24 +112,6 @@ static int efx_ef10_vport_free(struct efx_nic *efx, unsigned int port_id) NULL, 0, NULL); } -static int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - -static int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h index db4ef53..6d25b92 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.h +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -65,5 +65,11 @@ int efx_ef10_vswitching_restore_pf(struct efx_nic *efx); int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id); +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id); #endif /* EF10_SRIOV_H */ -- cgit v0.10.2 From 535a61777f44eebcb71f2a6866f95b17ee0f13c7 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:33 +0100 Subject: sfc: suppress handled MCDI failures when changing the MAC address Signed-off-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e0cb361..605cc89 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3933,8 +3933,8 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx->net_dev->dev_addr); MCDI_SET_DWORD(inbuf, VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID, nic_data->vport_id); - rc = efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, - sizeof(inbuf), NULL, 0, NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, + sizeof(inbuf), NULL, 0, NULL); efx_ef10_filter_table_probe(efx); up_write(&efx->filter_sem); @@ -3986,6 +3986,9 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); + } else { + efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, + sizeof(inbuf), NULL, 0, rc); } return rc; -- cgit v0.10.2 From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed5..3a7567f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); -- cgit v0.10.2 From 1973db0df7c3bd69de2a1041d3364567287771d9 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 7 Jul 2015 18:30:39 +0530 Subject: drivers: net: cpsw: fix crash while accessing second slave ethernet interface When cpsw's number of slave is set to 1 in device tree and while accessing second slave ndev and priv in cpsw_tx_interrupt(), there is a kernel crash. This is due to cpsw_get_slave_priv() not verifying number of slaves while retriving netdev priv and returns a invalid memory region. Fixing the issue by introducing number of slave check in cpsw_get_slave_priv() and cpsw_get_slave_ndev(). [ 15.879589] Unable to handle kernel paging request at virtual address 0f0e142c [ 15.888540] pgd = ed374000 [ 15.891359] [0f0e142c] *pgd=00000000 [ 15.895105] Internal error: Oops: 5 [#1] SMP ARM [ 15.899936] Modules linked in: [ 15.903139] CPU: 0 PID: 593 Comm: udhcpc Tainted: G W 4.1.0-12205-gfda8b18-dirty #10 [ 15.912386] Hardware name: Generic AM43 (Flattened Device Tree) [ 15.918557] task: ed2a2e00 ti: ed3fe000 task.ti: ed3fe000 [ 15.924187] PC is at cpsw_tx_interrupt+0x30/0x44 [ 15.929008] LR is at _raw_spin_unlock_irqrestore+0x40/0x44 [ 15.934726] pc : [] lr : [] psr: 20000193 [ 15.934726] sp : ed3ffc08 ip : ed2a2e40 fp : 00000000 [ 15.946685] r10: c0969ce8 r9 : c0969cfc r8 : 00000000 [ 15.952129] r7 : 000000c6 r6 : ee54ab00 r5 : ee169c64 r4 : ee534e00 [ 15.958932] r3 : 0f0e0d0c r2 : 00000000 r1 : ed3ffbc0 r0 : 00000001 [ 15.965735] Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 15.973261] Control: 10c5387d Table: ad374059 DAC: 00000015 [ 15.979246] Process udhcpc (pid: 593, stack limit = 0xed3fe218) [ 15.985414] Stack: (0xed3ffc08 to 0xed400000) [ 15.989954] fc00: ee54ab00 c009928c c0a9e648 60000193 000032e4 ee169c00 [ 15.998478] fc20: ee169c64 ee169c00 ee169c64 ee54ab00 00000001 00000001 ee67e268 ee008800 [ 16.006995] fc40: ee534800 c009946c ee169c00 ee169c64 c08bd660 c009c370 c009c2a4 000000c6 [ 16.015513] fc60: c08b75c4 c08b0854 00000000 c0098b3c 000000c6 c0098c50 ed3ffcb0 0000003a [ 16.024033] fc80: ed3ffcb0 fa24010c c08b7800 fa240100 ee7e9880 c00094c4 c05ef4e8 60000013 [ 16.032556] fca0: ffffffff ed3ffce4 ee7e9880 c05ef964 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.041080] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.049603] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff 00000001 00000001 [ 16.058121] fd00: ee536bf8 c0487a04 00000000 00000000 ee534800 00000000 00000156 c048c990 [ 16.066645] fd20: 00000000 00000000 c0969f40 00000000 00000000 c05000e8 00000001 00000000 [ 16.075167] fd40: 00000000 c051eefc 00000000 ee67e268 00000000 00000000 ee51b800 ed3ffd9c [ 16.083690] fd60: 00000000 ee67e200 ee51b800 ee7e9880 ee67e268 00000000 00000000 ee67e200 [ 16.092211] fd80: ee51b800 ee7e9880 ee67e268 ee534800 ee67e200 c051eedc ee67e268 00000010 [ 16.100727] fda0: 00000000 00000000 ee7e9880 ee534800 00000000 ee67e268 ee51b800 c05006fc [ 16.109247] fdc0: ee67e268 00000001 c0500488 00000156 ee7e9880 00000000 ed3fe000 fffffff4 [ 16.117771] fde0: ed3fff1c ee7e9880 ee534800 00000148 00000000 ed1f8340 00000000 00000000 [ 16.126289] fe00: 00000000 c05a9054 00000000 00000000 00000156 c0ab62a8 00000010 ed3e7000 [ 16.134812] fe20: 00000000 00000008 edcfb700 ed3fff1c c0fb5f94 ed2a2e00 c0fb5f64 000005d8 [ 16.143336] fe40: c0a9b3b8 00000000 ed3e7070 00000000 00000000 00000000 00009f40 00000000 [ 16.151858] fe60: 00000000 00020022 00110008 00000000 00000000 43004400 00000000 ffffffff [ 16.160374] fe80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 16.168898] fea0: edcfb700 bee5f380 00000014 00000000 ed3fe000 00000000 00004400 c04e2b64 [ 16.177415] fec0: 00000002 c04e3b00 ed3ffeec 00000001 0000011a 00000000 00000000 bee5f394 [ 16.185937] fee0: 00000148 ed3fff10 00000014 00000001 00000000 00000000 ed3ffee4 00000000 [ 16.194459] ff00: 00000000 00000000 00000000 c04e3664 00080011 00000002 06000000 ffffffff [ 16.202980] ff20: 0000ffff ffffffff 0000ffff c008dd54 ee5a6f08 ee636e80 c096972d c0089c14 [ 16.211499] ff40: 00000000 60000013 ee5a6f40 60000013 00000000 ee5a6f40 00000002 00000006 [ 16.220023] ff60: 00000000 edcfb700 00000001 ed2a2e00 c000f60c 00000001 0000011a c008ea34 [ 16.228540] ff80: 00000006 00000000 bee5f380 00000014 bee5f380 00000014 bee5f380 00000122 [ 16.237059] ffa0: c000f7c4 c000f5e0 bee5f380 00000014 00000006 bee5f394 00000148 00000000 [ 16.245581] ffc0: bee5f380 00000014 bee5f380 00000122 fffffd6e 00004300 00004800 00004400 [ 16.254104] ffe0: bee5f378 bee5f36c 000307ec b6f39044 40000010 00000006 ed36fa40 00000000 [ 16.262642] [] (cpsw_tx_interrupt) from [] (handle_irq_event_percpu+0x64/0x204) [ 16.272076] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x40/0x64) [ 16.281330] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xcc/0x1a8) [ 16.290220] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [ 16.299197] [] (generic_handle_irq) from [] (__handle_domain_irq+0x64/0xdc) [ 16.308273] [] (__handle_domain_irq) from [] (gic_handle_irq+0x20/0x60) [ 16.316987] [] (gic_handle_irq) from [] (__irq_svc+0x44/0x5c) [ 16.324779] Exception stack(0xed3ffcb0 to 0xed3ffcf8) [ 16.330044] fca0: 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.338567] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.347090] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff [ 16.353987] [] (__irq_svc) from [] (_raw_spin_unlock_irqrestore+0x34/0x44) [ 16.362973] [] (_raw_spin_unlock_irqrestore) from [] (cpdma_check_free_tx_desc+0x60/0x6c) [ 16.373311] [] (cpdma_check_free_tx_desc) from [] (cpsw_ndo_start_xmit+0xb4/0x1ac) [ 16.383017] [] (cpsw_ndo_start_xmit) from [] (dev_hard_start_xmit+0x2a4/0x4c0) [ 16.392364] [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0xf4/0x210) [ 16.401246] [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x2ac/0x7bc) [ 16.409960] [] (__dev_queue_xmit) from [] (packet_sendmsg+0xc68/0xeb4) [ 16.418585] [] (packet_sendmsg) from [] (sock_sendmsg+0x14/0x24) [ 16.426663] [] (sock_sendmsg) from [] (SyS_sendto+0xb4/0xe0) [ 16.434377] [] (SyS_sendto) from [] (ret_fast_syscall+0x0/0x54) [ 16.442360] Code: e5943118 e593303c e3530000 0a000002 (e5930720) [ 16.448716] ---[ end trace a68159f094d85ba6 ]--- [ 16.453526] Kernel panic - not syncing: Fatal exception in interrupt [ 16.460149] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Mugunthan V N Cc: # v3.8+ Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4628205..e778703 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -509,9 +509,11 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { (func)(slave++, ##arg); \ } while (0) #define cpsw_get_slave_ndev(priv, __slave_no__) \ - (priv->slaves[__slave_no__].ndev) + ((__slave_no__ < priv->data.slaves) ? \ + priv->slaves[__slave_no__].ndev : NULL) #define cpsw_get_slave_priv(priv, __slave_no__) \ - ((priv->slaves[__slave_no__].ndev) ? \ + (((__slave_no__ < priv->data.slaves) && \ + (priv->slaves[__slave_no__].ndev)) ? \ netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ #define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ -- cgit v0.10.2 From f1158b74e54f2e2462ba5e2f45a118246d9d5b43 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 7 Jul 2015 15:55:56 +0200 Subject: bridge: mdb: zero out the local br_ip variable before use Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") there's a check in br_ip_equal() for a matching vlan id, but the mdb functions were not modified to use (or at least zero it) so when an entry was added it would have a garbage vlan id (from the local br_ip variable in __br_mdb_add/del) and this would prevent it from being matched and also deleted. So zero out the whole local ip var to protect ourselves from future changes and also to fix the current bug, since there's no vlan id support in the mdb uapi - use always vlan id 0. Example before patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent RTNETLINK answers: Invalid argument After patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb Signed-off-by: Nikolay Aleksandrov Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") Signed-off-by: David S. Miller diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3bfc675..60868c2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -374,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -420,6 +421,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) -- cgit v0.10.2 From 671b53eec2edcbfac3e53d02cf3d0c6d9ecc07de Mon Sep 17 00:00:00 2001 From: Shradha Shah Date: Wed, 8 Jul 2015 10:12:45 +0100 Subject: sfc: Ensure down_write(&filter_sem) and up_write() are matched before calling efx_net_open() This patch avoids the double up_write to filter_sem if efx_net_open() fails. Resolves: 2d432f20d27c1813a2746008e16dd6ce12a14dc1 Signed-off-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7485f71..3c17f27 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -598,21 +598,21 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, vf->vlan, &vf->vport_id); if (rc) - goto reset_nic; + goto reset_nic_up_write; restore_mac: if (!is_zero_ether_addr(vf->mac)) { rc2 = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); if (rc2) { eth_zero_addr(vf->mac); - goto reset_nic; + goto reset_nic_up_write; } } restore_evb_port: rc2 = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); if (rc2) - goto reset_nic; + goto reset_nic_up_write; else vf->vport_assigned = 1; @@ -620,14 +620,16 @@ restore_vadaptor: if (vf->efx) { rc2 = efx_ef10_vadaptor_alloc(vf->efx, EVB_PORT_ID_ASSIGNED); if (rc2) - goto reset_nic; + goto reset_nic_up_write; } restore_filters: if (vf->efx) { rc2 = vf->efx->type->filter_table_probe(vf->efx); if (rc2) - goto reset_nic; + goto reset_nic_up_write; + + up_write(&vf->efx->filter_sem); up_write(&vf->efx->filter_sem); @@ -639,9 +641,12 @@ restore_filters: } return rc; +reset_nic_up_write: + if (vf->efx) + up_write(&vf->efx->filter_sem); + reset_nic: if (vf->efx) { - up_write(&vf->efx->filter_sem); netif_err(efx, drv, efx->net_dev, "Failed to restore VF - scheduling reset.\n"); efx_schedule_reset(vf->efx, RESET_TYPE_DATAPATH); -- cgit v0.10.2 From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 7 Jul 2015 13:56:57 -0400 Subject: ipv4: add support for linkdown sysctl to netconf This kernel patch exports the value of the new ignore_routes_with_linkdown via netconf. v2: changes to notify userspace via netlink when sysctl values change and proposed for 'net' since this could be considered a bugfix Signed-off-by: Andy Gospodarek Suggested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 669a1f0..23cbd34 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -15,6 +15,7 @@ enum { NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, NETCONFA_PROXY_NEIGH, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7498716..e813196 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); return size; } @@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } + if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + ifindex, cnf); + } } return ret; -- cgit v0.10.2 From 0769636cb5b95665ebadcd1a41c46f331f5a397d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 7 Jul 2015 14:02:18 -0400 Subject: vmxnet3: prevent receive getting out of sequence on napi poll vmxnet3's current napi path is built to count every rx descriptor we recieve, and use that as a count of the napi budget. That means its possible to return from a napi poll halfway through recieving a fragmented packet accross multiple dma descriptors. If that happens, the next napi poll will start with the descriptor ring in an improper state (e.g. the first descriptor we look at may have the end-of-packet bit set), which will cause a BUG halt in the driver. Fix the issue by only counting whole received packets in the napi poll and returning that value, rather than the descriptor count. Tested by the reporter and myself, successfully Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "David S. Miller" Acked-by: Andy Gospodarek Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index da11bb5..46f4cad 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1216,7 +1216,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, static const u32 rxprod_reg[2] = { VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 }; - u32 num_rxd = 0; + u32 num_pkts = 0; bool skip_page_frags = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; @@ -1235,13 +1235,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxDesc *rxd; u32 idx, ring_idx; struct vmxnet3_cmd_ring *ring = NULL; - if (num_rxd >= quota) { + if (num_pkts >= quota) { /* we may stop even before we see the EOP desc of * the current pkt */ break; } - num_rxd++; BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); idx = rcd->rxdIdx; ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; @@ -1413,6 +1412,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + num_pkts++; } rcd_done: @@ -1443,7 +1443,7 @@ rcd_done: &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); } - return num_rxd; + return num_pkts; } -- cgit v0.10.2 From 4eed4d8ff984abcb983ada5b3dbf56fce35f1068 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 7 Jul 2015 20:48:55 +0200 Subject: 3c59x: Fix shared IRQ handling As its first order of business, boomerang_interrupt() checks whether the device really has any pending interrupts. If it does not, it does nothing and returns, but it still returns IRQ_HANDLED. This is wrong: interrupt was not handled, IRQ handlers of other devices sharing this IRQ line need to be called. vortex_interrupt() has it right: it returns IRQ_NONE in this case via IRQ_RETVAL(0). Do the same in boomerang_interrupt(). Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 41095eb..2d1ce3c 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2382,6 +2382,7 @@ boomerang_interrupt(int irq, void *dev_id) void __iomem *ioaddr; int status; int work_done = max_interrupt_work; + int handled = 0; ioaddr = vp->ioaddr; @@ -2400,6 +2401,7 @@ boomerang_interrupt(int irq, void *dev_id) if ((status & IntLatch) == 0) goto handler_exit; /* No interrupt: shared IRQs can cause this */ + handled = 1; if (status == 0xffff) { /* h/w no longer present (hotplug)? */ if (vortex_debug > 1) @@ -2501,7 +2503,7 @@ boomerang_interrupt(int irq, void *dev_id) handler_exit: vp->handling_irq = 0; spin_unlock(&vp->lock); - return IRQ_HANDLED; + return IRQ_RETVAL(handled); } static int vortex_rx(struct net_device *dev) -- cgit v0.10.2 From d065c3c17dae95832badf6329512dd057c905890 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 7 Jul 2015 13:54:12 -0700 Subject: drivers/net/usb: add device id for NVIDIA Tegra USB 3.0 Ethernet This device is sold as 'NVIDIA Tegra USB 3.0 Ethernet'. Chipset is RTL8153 and works with r8152. Signed-off-by: Zheng Liu Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4545e78..35a2bff 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -523,6 +523,7 @@ static const struct driver_info wwan_info = { #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef +#define NVIDIA_VENDOR_ID 0x0955 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -710,6 +711,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aafa1a1..7f6419e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -494,6 +494,7 @@ enum rtl8152_flags { #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_NVIDIA 0x0955 #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -4117,6 +4118,7 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; -- cgit v0.10.2 From c936835c1ec6f871f32c9b87a7708700320075b3 Mon Sep 17 00:00:00 2001 From: Peter Dunning Date: Wed, 8 Jul 2015 10:05:10 +0100 Subject: sfc: Report TX completions to BQL after all TX events in interrupt The limit for BQL is updated each time we call netdev_tx_completed_queue. Without this patch the BQL limit was updated for every TX event we see. The issue was that this only updated the limit to handle the data we complete in two events as the first event wouldn't show that enough traffic had been processed between them. This was OK when interrupt moderation was off but not when it was on as more data had to be completed in a single interrupt. The patch changes this so that we do report the completion to BQL only when all the TX events in the interrupt have been processed. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 804b9ad..03bc03b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -245,11 +245,17 @@ static int efx_check_disabled(struct efx_nic *efx) */ static int efx_process_channel(struct efx_channel *channel, int budget) { + struct efx_tx_queue *tx_queue; int spent; if (unlikely(!channel->enabled)) return 0; + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->pkts_compl = 0; + tx_queue->bytes_compl = 0; + } + spent = efx_nic_process_eventq(channel, budget); if (spent && efx_channel_has_rx_queue(channel)) { struct efx_rx_queue *rx_queue = @@ -259,6 +265,14 @@ static int efx_process_channel(struct efx_channel *channel, int budget) efx_fast_push_rx_descriptors(rx_queue, true); } + /* Update BQL */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + if (tx_queue->bytes_compl) { + netdev_tx_completed_queue(tx_queue->core_txq, + tx_queue->pkts_compl, tx_queue->bytes_compl); + } + } + return spent; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index d72f522..47d1e3a 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -241,6 +241,8 @@ struct efx_tx_queue { unsigned int read_count ____cacheline_aligned_in_smp; unsigned int old_write_count; unsigned int merge_events; + unsigned int bytes_compl; + unsigned int pkts_compl; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index aaf2987..1833a01 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -617,7 +617,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); - netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; if (pkts_compl > 1) ++tx_queue->merge_events; -- cgit v0.10.2 From d5de1987853a7778bb048a9e52b3486eb9aceb17 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:16:49 +0200 Subject: macvtap: Destroy minor_idr on module_exit Destroy minor_idr on module_exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: David S. Miller diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6a64197f..b4b6191 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1294,6 +1294,7 @@ static void macvtap_exit(void) class_unregister(macvtap_class); cdev_del(&macvtap_cdev); unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + idr_destroy(&minor_idr); } module_exit(macvtap_exit); -- cgit v0.10.2 From e2fc61f384c9224b55990a644bbcf68c25e20203 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 29 Jun 2015 19:15:03 +0300 Subject: ARCv2: [axs103] bump CPU frequency from 75 to 90 MHZ With up-to-date FPGA builds ARC cores are supposed to correctly operate even with 90 MHz clock (which is a target frequency for AXS103 release). Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 15c8d62..1cd5e82 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 199d428..2f0b332 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; -- cgit v0.10.2 From 80f420842ff42ad61f84584716d74ef635f13892 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Jul 2015 11:26:22 +0530 Subject: ARC: Make ARC bitops "safer" (add anti-optimization) ARCompact/ARCv2 ISA provide that any instructions which deals with bitpos/count operand ASL, LSL, BSET, BCLR, BMSK .... will only consider lower 5 bits. i.e. auto-clamp the pos to 0-31. ARC Linux bitops exploited this fact by NOT explicitly masking out upper bits for @nr operand in general, saving a bunch of AND/BMSK instructions in generated code around bitops. While this micro-optimization has worked well over years it is NOT safe as shifting a number with a value, greater than native size is "undefined" per "C" spec. So as it turns outm EZChip ran into this eventually, in their massive muti-core SMP build with 64 cpus. There was a test_bit() inside a loop from 63 to 0 and gcc was weirdly optimizing away the first iteration (so it was really adhering to standard by implementing undefined behaviour vs. removing all the iterations which were phony i.e. (1 << [63..32]) | for i = 63 to 0 | X = ( 1 << i ) | if X == 0 | continue So fix the code to do the explicit masking at the expense of generating additional instructions. Fortunately, this can be mitigated to a large extent as gcc has SHIFT_COUNT_TRUNCATED which allows combiner to fold masking into shift operation itself. It is currently not enabled in ARC gcc backend, but could be done after a bit of testing. Fixes STAR 9000866918 ("unsafe "undefined behavior" code in kernel") Reported-by: Noam Camus Cc: Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 99fe118..57c1f33 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ * done for const @nr, but no code is generated due to gcc \ * const prop. \ */ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ @@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ unsigned long temp, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ /* \ * spin lock/unlock provide the needed smp_mb() before/after \ */ \ bitops_lock(flags); \ \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ } @@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * unsigned long old, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ bitops_lock(flags); \ \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ unsigned long temp; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ } #define __TEST_N_BIT_OP(op, c_op, asm_op) \ @@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long unsigned long old; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #define BIT_OPS(op, c_op, asm_op) \ @@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) addr += nr >> 5; - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - mask = 1 << nr; + mask = 1UL << (nr & 0x1f); return ((mask & *addr) != 0); } -- cgit v0.10.2 From 70d93d89416562c32adc9444a15677bdf25a72ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=ABl=20Porquet?= Date: Tue, 7 Jul 2015 17:14:56 -0400 Subject: arc:irqchip: prepare for drivers/irqchip/irqchip.h removal The IRQCHIP_DECLARE macro migrated to 'include/linux/irqchip.h'. See commit 91e20b5040c67c51aad88cf87db4305c5bd7f79d ("irqchip: Move IRQCHIP_DECLARE macro to include/linux/irqchip.h"). This patch removes the inclusions of private header 'drivers/irqchip/irqchip.h' and if necessary replaces them with inclusions of 'include/linux/irqchip.h'. Signed-off-by: Joel Porquet Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 6208c63..26c1568 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -12,7 +12,6 @@ #include #include #include -#include "../../drivers/irqchip/irqchip.h" #include /* diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index fcdddb6..039fac3 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -12,7 +12,6 @@ #include #include #include -#include "../../drivers/irqchip/irqchip.h" #include /* diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 6fb0a2f..2fb8658 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -175,7 +175,6 @@ void mcip_init_early_smp(void) #include #include #include -#include "../../drivers/irqchip/irqchip.h" /* * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core) -- cgit v0.10.2 From 9138d4138d8e1f0acf734d2fb3455108bf43380f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 25 Jun 2015 13:46:57 +0530 Subject: ARC: Add llock/scond to futex backend Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 05b5aaf..70cfe16 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h @@ -16,12 +16,40 @@ #include #include +#ifdef CONFIG_ARC_HAS_LLSC + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ + \ + __asm__ __volatile__( \ + "1: llock %1, [%2] \n" \ + insn "\n" \ + "2: scond %0, [%2] \n" \ + " bnz 1b \n" \ + " mov %0, 0 \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + "4: mov %0, %4 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .align 4 \n" \ + " .word 1b, 4b \n" \ + " .word 2b, 4b \n" \ + " .previous \n" \ + \ + : "=&r" (ret), "=&r" (oldval) \ + : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ + : "cc", "memory") + +#else /* !CONFIG_ARC_HAS_LLSC */ + #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ \ __asm__ __volatile__( \ - "1: ld %1, [%2] \n" \ + "1: ld %1, [%2] \n" \ insn "\n" \ - "2: st %0, [%2] \n" \ + "2: st %0, [%2] \n" \ " mov %0, 0 \n" \ "3: \n" \ " .section .fixup,\"ax\" \n" \ @@ -39,6 +67,8 @@ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ : "cc", "memory") +#endif + static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; @@ -123,11 +153,17 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, pagefault_disable(); - /* TBD : can use llock/scond */ __asm__ __volatile__( - "1: ld %0, [%3] \n" - " brne %0, %1, 3f \n" - "2: st %2, [%3] \n" +#ifdef CONFIG_ARC_HAS_LLSC + "1: llock %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: scond %2, [%3] \n" + " bnz 1b \n" +#else + "1: ld %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: st %2, [%3] \n" +#endif "3: \n" " .section .fixup,\"ax\" \n" "4: mov %0, %4 \n" -- cgit v0.10.2 From b631788ab42ea4ba97dc30b3f47bad0edb0ddb4b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Jul 2015 15:25:21 +0530 Subject: ARC: slightly refactor macros for boot logging Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a3d1862..08d98ee 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -150,9 +150,10 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -#define IS_AVAIL1(v, str) ((v) ? str : "") -#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ") -#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg)) +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { @@ -226,7 +227,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); } n += scnprintf(buf + n, len - n, "%s", - IS_USED(CONFIG_ARC_HAS_HW_MPY)); + IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY)); } n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", -- cgit v0.10.2 From 02941007f59ce015233d4c0f7047776960bf0c17 Mon Sep 17 00:00:00 2001 From: "qipeng.zha" Date: Thu, 9 Jul 2015 00:14:15 +0800 Subject: intel_pmc_ipc: Update kerneldoc formatting Update kerneldoc formatting per Documentation/kernel-dec-nano-HOWTO.txt. Signed-off-by: qipeng.zha Signed-off-by: Darren Hart diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 200ec2e..cd0310e 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -25,36 +25,9 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -/* - * intel_pmc_ipc_simple_command - * @cmd: command - * @sub: sub type - */ int intel_pmc_ipc_simple_command(int cmd, int sub); - -/* - * intel_pmc_ipc_raw_cmd - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - * @sptr: data writing to SPTR register - * @dptr: data writing to DPTR register - */ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr); - -/* - * intel_pmc_ipc_command - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - */ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 69d43b8..105cfff 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -210,10 +210,15 @@ static int intel_pmc_ipc_check_status(void) return ret; } -/* - * intel_pmc_ipc_simple_command - * @cmd: command - * @sub: sub type +/** + * intel_pmc_ipc_simple_command() - Simple IPC command + * @cmd: IPC command code. + * @sub: IPC command sub type. + * + * Send a simple IPC command to PMC when don't need to specify + * input/output data and source/dest pointers. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_simple_command(int cmd, int sub) { @@ -232,16 +237,20 @@ int intel_pmc_ipc_simple_command(int cmd, int sub) } EXPORT_SYMBOL_GPL(intel_pmc_ipc_simple_command); -/* - * intel_pmc_ipc_raw_cmd - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - * @sptr: data writing to SPTR register - * @dptr: data writing to DPTR register +/** + * intel_pmc_ipc_raw_cmd() - IPC command with data and pointers + * @cmd: IPC command code. + * @sub: IPC command sub type. + * @in: input data of this IPC command. + * @inlen: input data length in bytes. + * @out: output data of this IPC command. + * @outlen: output data length in dwords. + * @sptr: data writing to SPTR register. + * @dptr: data writing to DPTR register. + * + * Send an IPC command to PMC with input/output data and source/dest pointers. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr) @@ -278,14 +287,18 @@ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, } EXPORT_SYMBOL_GPL(intel_pmc_ipc_raw_cmd); -/* - * intel_pmc_ipc_command - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords +/** + * intel_pmc_ipc_command() - IPC command with input/output data + * @cmd: IPC command code. + * @sub: IPC command sub type. + * @in: input data of this IPC command. + * @inlen: input data length in bytes. + * @out: output data of this IPC command. + * @outlen: output data length in dwords. + * + * Send an IPC command to PMC with input/output data. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen) -- cgit v0.10.2 From 838f13bf4b6737d4aec508558e45f81798fc2677 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:47 -0400 Subject: blkcg: allow blkcg_pol_mutex to be grabbed from cgroup [file] methods blkcg_pol_mutex primarily protects the blkcg_policy array. It also protects cgroup file type [un]registration during policy addition / removal. This puts blkcg_pol_mutex outside cgroup internal synchronization and in turn makes it impossible to grab from blkcg's cgroup methods as that leads to cyclic dependency. Another problematic dependency arising from this is through cgroup interface file deactivation. Removing a cftype requires removing all files of the type which in turn involves draining all on-going invocations of the file methods. This means that an interface file implementation can't grab blkcg_pol_mutex as draining can lead to AA deadlock. blkcg_reset_stats() is already in this situation. It currently trylocks blkcg_pol_mutex and then unwinds and retries the whole operation on failure, which is cumbersome at best. It has a lengthy comment explaining how cgroup internal synchronization is involved and expected to be updated but as explained above this doesn't need cgroup internal locking to deadlock. It's a self-contained AA deadlock. The described circular dependencies can be easily broken by moving cftype [un]registration out of blkcg_pol_mutex and protect them with an outer mutex. This patch introduces blkcg_pol_register_mutex which wraps entire policy [un]registration including cftype operations and shrinks blkcg_pol_mutex critical section. This also makes the trylock dancing in blkcg_reset_stats() unnecessary. Removed. This patch is necessary for the following blkcg_policy_data allocation bug fixes. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5e2723f..2ff74ff 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -29,6 +29,14 @@ #define MAX_KEY_LEN 100 +/* + * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. + * blkcg_pol_register_mutex nests outside of it and synchronizes entire + * policy [un]register operations including cgroup file additions / + * removals. Putting cgroup file registration outside blkcg_pol_mutex + * allows grabbing it from cgroup callbacks. + */ +static DEFINE_MUTEX(blkcg_pol_register_mutex); static DEFINE_MUTEX(blkcg_pol_mutex); struct blkcg blkcg_root; @@ -453,20 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct blkcg_gq *blkg; int i; - /* - * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex - * which ends up putting cgroup's internal cgroup_tree_mutex under - * it; however, cgroup_tree_mutex is nested above cgroup file - * active protection and grabbing blkcg_pol_mutex from a cgroup - * file operation creates a possible circular dependency. cgroup - * internal locking is planned to go through further simplification - * and this issue should go away soon. For now, let's trylock - * blkcg_pol_mutex and restart the write on failure. - * - * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com - */ - if (!mutex_trylock(&blkcg_pol_mutex)) - return restart_syscall(); + mutex_lock(&blkcg_pol_mutex); spin_lock_irq(&blkcg->lock); /* @@ -1190,6 +1185,7 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) return -EINVAL; + mutex_lock(&blkcg_pol_register_mutex); mutex_lock(&blkcg_pol_mutex); /* find an empty slot */ @@ -1198,19 +1194,23 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (!blkcg_policy[i]) break; if (i >= BLKCG_MAX_POLS) - goto out_unlock; + goto err_unlock; /* register and update blkgs */ pol->plid = i; blkcg_policy[i] = pol; + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ if (pol->cftypes) WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, pol->cftypes)); - ret = 0; -out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); + return 0; + +err_unlock: mutex_unlock(&blkcg_pol_mutex); + mutex_unlock(&blkcg_pol_register_mutex); return ret; } EXPORT_SYMBOL_GPL(blkcg_policy_register); @@ -1223,7 +1223,7 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - mutex_lock(&blkcg_pol_mutex); + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) goto out_unlock; @@ -1233,8 +1233,10 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) cgroup_rm_cftypes(pol->cftypes); /* unregister and update blkgs */ + mutex_lock(&blkcg_pol_mutex); blkcg_policy[pol->plid] = NULL; -out_unlock: mutex_unlock(&blkcg_pol_mutex); +out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -- cgit v0.10.2 From 144232b34258c1fc19729e077c6fb161e30da07b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:48 -0400 Subject: blkcg: blkcg_css_alloc() should grab blkcg_pol_mutex while iterating blkcg_policy[] An entry in blkcg_policy[] is stable while there are non-bypassing in-flight IOs on a request_queue which has the policy activated. This is why most derefs of blkcg_policy[] don't need explicit locking; however, blkcg_css_alloc() isn't invoked from IO path and thus doesn't have this protection and may race policies being added and removed. Fix it by adding explicit blkcg_pol_mutex protection around blkcg_policy[] iteration in blkcg_css_alloc(). Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2ff74ff..05b893d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -844,6 +844,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) goto free_blkcg; } + mutex_lock(&blkcg_pol_mutex); + for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -860,6 +862,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) BUG_ON(blkcg->pd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } @@ -868,6 +871,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } + mutex_unlock(&blkcg_pol_mutex); done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); -- cgit v0.10.2 From 7876f930d0e78addc6bbdbba0d6c196a0788d545 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:49 -0400 Subject: blkcg: implement all_blkcgs list Add all_blkcgs list goes through blkcg->all_blkcgs_node and is protected by blkcg_pol_mutex. This will be used to fix blkcg_policy_data allocation bug. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 05b893d..42ff436 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -46,6 +46,8 @@ struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -817,6 +819,10 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); + mutex_lock(&blkcg_pol_mutex); + list_del(&blkcg->all_blkcgs_node); + mutex_unlock(&blkcg_pol_mutex); + if (blkcg != &blkcg_root) { int i; @@ -833,6 +839,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) struct cgroup_subsys_state *ret; int i; + mutex_lock(&blkcg_pol_mutex); + if (!parent_css) { blkcg = &blkcg_root; goto done; @@ -844,8 +852,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) goto free_blkcg; } - mutex_lock(&blkcg_pol_mutex); - for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -862,7 +868,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) BUG_ON(blkcg->pd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { - mutex_unlock(&blkcg_pol_mutex); ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } @@ -871,7 +876,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } - mutex_unlock(&blkcg_pol_mutex); done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); @@ -879,14 +883,17 @@ done: #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); #endif + list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); + + mutex_unlock(&blkcg_pol_mutex); return &blkcg->css; free_pd_blkcg: for (i--; i >= 0; i--) kfree(blkcg->pd[i]); - free_blkcg: kfree(blkcg); + mutex_unlock(&blkcg_pol_mutex); return ret; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 58cfab8..cf3e7bc 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -47,6 +47,7 @@ struct blkcg { struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif -- cgit v0.10.2 From 06b285bd11257bccc5a1b85a835507e33656aff2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:50 -0400 Subject: blkcg: fix blkcg_policy_data allocation bug e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") updated per-blkcg policy data to be dynamically allocated. When a policy is registered, its policy data aren't created. Instead, when the policy is activated on a queue, the policy data are allocated if there are blkg's (blkcg_gq's) which are attached to a given blkcg. This is buggy. Consider the following scenario. 1. A blkcg is created. No blkg's attached yet. 2. The policy is registered. No policy data is allocated. 3. The policy is activated on a queue. As the above blkcg doesn't have any blkg's, it won't allocate the matching blkcg_policy_data. 4. An IO is issued from the blkcg and blkg is created and the blkcg still doesn't have the matching policy data allocated. With cfq-iosched, this leads to an oops. It also doesn't free policy data on policy unregistration assuming that freeing of all policy data on blkcg destruction should take care of it; however, this also is incorrect. 1. A blkcg has policy data. 2. The policy gets unregistered but the policy data remains. 3. Another policy gets registered on the same slot. 4. Later, the new policy tries to allocate policy data on the previous blkcg but the slot is already occupied and gets skipped. The policy ends up operating on the policy data of the previous policy. There's no reason to manage blkcg_policy_data lazily. The reason we do lazy allocation of blkg's is that the number of all possible blkg's is the product of cgroups and block devices which can reach a surprising level. blkcg_policy_data is contrained by the number of cgroups and shouldn't be a problem. This patch makes blkcg_policy_data to be allocated for all existing blkcg's on policy registration and freed on unregistration and removes blkcg_policy_data handling from policy [de]activation paths. This makes that blkcg_policy_data are created and removed with the policy they belong to and fixes the above described problems. Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 42ff436..9da02c0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1048,10 +1048,8 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - LIST_HEAD(cpds); struct blkcg_gq *blkg; struct blkg_policy_data *pd, *nd; - struct blkcg_policy_data *cpd, *cnd; int cnt = 0, ret; if (blkcg_policy_enabled(q, pol)) @@ -1064,10 +1062,7 @@ int blkcg_activate_policy(struct request_queue *q, cnt++; spin_unlock_irq(q->queue_lock); - /* - * Allocate per-blkg and per-blkcg policy data - * for all existing blkgs. - */ + /* allocate per-blkg policy data for all existing blkgs */ while (cnt--) { pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); if (!pd) { @@ -1075,15 +1070,6 @@ int blkcg_activate_policy(struct request_queue *q, goto out_free; } list_add_tail(&pd->alloc_node, &pds); - - if (!pol->cpd_size) - continue; - cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node); - if (!cpd) { - ret = -ENOMEM; - goto out_free; - } - list_add_tail(&cpd->alloc_node, &cpds); } /* @@ -1093,32 +1079,17 @@ int blkcg_activate_policy(struct request_queue *q, spin_lock_irq(q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { - if (WARN_ON(list_empty(&pds)) || - WARN_ON(pol->cpd_size && list_empty(&cpds))) { + if (WARN_ON(list_empty(&pds))) { /* umm... this shouldn't happen, just abort */ ret = -ENOMEM; goto out_unlock; } - cpd = list_first_entry(&cpds, struct blkcg_policy_data, - alloc_node); - list_del_init(&cpd->alloc_node); pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); list_del_init(&pd->alloc_node); /* grab blkcg lock too while installing @pd on @blkg */ spin_lock(&blkg->blkcg->lock); - if (!pol->cpd_size) - goto no_cpd; - if (!blkg->blkcg->pd[pol->plid]) { - /* Per-policy per-blkcg data */ - blkg->blkcg->pd[pol->plid] = cpd; - cpd->plid = pol->plid; - pol->cpd_init_fn(blkg->blkcg); - } else { /* must free it as it has already been extracted */ - kfree(cpd); - } -no_cpd: blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; @@ -1135,8 +1106,6 @@ out_free: blk_queue_bypass_end(q); list_for_each_entry_safe(pd, nd, &pds, alloc_node) kfree(pd); - list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node) - kfree(cpd); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); @@ -1191,6 +1160,7 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); */ int blkcg_policy_register(struct blkcg_policy *pol) { + struct blkcg *blkcg; int i, ret; if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) @@ -1207,9 +1177,27 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (i >= BLKCG_MAX_POLS) goto err_unlock; - /* register and update blkgs */ + /* register @pol */ pol->plid = i; - blkcg_policy[i] = pol; + blkcg_policy[pol->plid] = pol; + + /* allocate and install cpd's */ + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + struct blkcg_policy_data *cpd; + + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); + goto err_free_cpds; + } + + blkcg->pd[pol->plid] = cpd; + cpd->plid = pol->plid; + pol->cpd_init_fn(blkcg); + } + } + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ @@ -1219,6 +1207,14 @@ int blkcg_policy_register(struct blkcg_policy *pol) mutex_unlock(&blkcg_pol_register_mutex); return 0; +err_free_cpds: + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); mutex_unlock(&blkcg_pol_register_mutex); @@ -1234,6 +1230,8 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { + struct blkcg *blkcg; + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1243,9 +1241,17 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) if (pol->cftypes) cgroup_rm_cftypes(pol->cftypes); - /* unregister and update blkgs */ + /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); + + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } blkcg_policy[pol->plid] = NULL; + mutex_unlock(&blkcg_pol_mutex); out_unlock: mutex_unlock(&blkcg_pol_register_mutex); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index cf3e7bc..1b62d76 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -89,18 +89,12 @@ struct blkg_policy_data { * Policies that need to keep per-blkcg data which is independent * from any request_queue associated to it must specify its size * with the cpd_size field of the blkcg_policy structure and - * embed a blkcg_policy_data in it. blkcg core allocates - * policy-specific per-blkcg structures lazily the first time - * they are actually needed, so it handles them together with - * blkgs. cpd_init() is invoked to let each policy handle - * per-blkcg data. + * embed a blkcg_policy_data in it. cpd_init() is invoked to let + * each policy handle per-blkcg data. */ struct blkcg_policy_data { /* the policy id this per-policy data belongs to */ int plid; - - /* used during policy activation */ - struct list_head alloc_node; }; /* association between a blk cgroup and a request queue */ -- cgit v0.10.2 From ae0afb4f5d44d17a0fd135ae000e2acf12c53617 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 9 Jul 2015 22:59:49 +0200 Subject: suspend-to-idle: Prevent RCU from complaining about tick_freeze() Put tick_freeze() under RCU_NONIDLE() to prevent RCU from complaining about suspicious RCU usage in idle by trace_suspend_resume() called from there. While at it, fix a comment related to another usage of RCU_NONIDLE() in enter_freeze_proper(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Paul E. McKenney diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index e8e2775..48b7228 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -112,7 +112,12 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static void enter_freeze_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { - tick_freeze(); + /* + * trace_suspend_resume() called by tick_freeze() for the last CPU + * executing it contains RCU usage regarded as invalid in the idle + * context, so tell RCU about that. + */ + RCU_NONIDLE(tick_freeze()); /* * The state used here cannot be a "coupled" one, because the "coupled" * cpuidle mechanism enables interrupts and doing that with timekeeping @@ -122,7 +127,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, WARN_ON(!irqs_disabled()); /* * timekeeping_resume() that will be called by tick_unfreeze() for the - * last CPU executing it calls functions containing RCU read-side + * first CPU executing it calls functions containing RCU read-side * critical sections, so tell RCU about that. */ RCU_NONIDLE(tick_unfreeze()); -- cgit v0.10.2 From adb3505086def6d3e37013ec6cf8313cc719cbb6 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 8 Jul 2015 10:49:30 +0530 Subject: net: systemport: Use eth_hw_addr_random Use eth_hw_addr_random() instead of calling random_ether_addr(). Here, this change is setting addr_assign_type to NET_ADDR_RANDOM. The Coccinelle semantic patch that performs this transformation is as follows: @@ identifier a,b; @@ -random_ether_addr(a->b); +eth_hw_addr_random(a); Signed-off-by: Vaishali Thakkar Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 909ad7a..4566cdf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1793,7 +1793,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) macaddr = of_get_mac_address(dn); if (!macaddr || !is_valid_ether_addr(macaddr)) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } else { ether_addr_copy(dev->dev_addr, macaddr); } -- cgit v0.10.2 From 5a0266af10246c84a6c712f365788800c7e23fd4 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 8 Jul 2015 14:35:22 +0530 Subject: drivers: net: cpsw: fix disabling of tx interrupt in rx isr In commit 'c03abd84634d ("net: ethernet: cpsw: don't requests IRQs we don't use")', common isr is split into tx and rx, but in rx isr tx interrupt is also disabledi in cpsw_disable_irq(). So tx interrupts are not handled during rx interrupts and rx napi completion and results in poor tx performance by 40Mbps. Fixing by disabling only rx interrupt in rx isr. Cc: Felipe Balbi Cc: # v4.0+ Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e778703..f335bf1 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -138,19 +138,6 @@ do { \ #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) -#define cpsw_enable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - enable_irq(priv->irqs_table[i]); \ - } while (0) -#define cpsw_disable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - disable_irq_nosync(priv->irqs_table[i]); \ - } while (0) - #define cpsw_slave_index(priv) \ ((priv->data.dual_emac) ? priv->emac_port : \ priv->data.active_slave) @@ -783,7 +770,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) cpsw_intr_disable(priv); if (priv->irq_enabled == true) { - cpsw_disable_irq(priv); + disable_irq_nosync(priv->irqs_table[0]); priv->irq_enabled = false; } @@ -819,7 +806,7 @@ static int cpsw_poll(struct napi_struct *napi, int budget) prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(priv); + enable_irq(priv->irqs_table[0]); } } @@ -1335,7 +1322,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (prim_cpsw->irq_enabled == false) { if ((priv == prim_cpsw) || !netif_running(prim_cpsw->ndev)) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(prim_cpsw); + enable_irq(prim_cpsw->irqs_table[0]); } } -- cgit v0.10.2 From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47..efc18e0 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; - ret = cdc_ncm_bind_common(dev, intf, data_altsetting); + ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0); if (ret) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8f..1991e4a 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -684,10 +684,12 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) ctx->tx_curr_skb = NULL; } + kfree(ctx->delayed_ndp16); + kfree(ctx); } -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; @@ -855,6 +857,17 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* Device-specific flags */ + ctx->drvflags = drvflags; + + /* Allocate the delayed NDP if needed. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); + if (!ctx->delayed_ndp16) + goto error2; + dev_info(&intf->dev, "NDP will be placed at end of frame for this device."); + } + /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; @@ -954,8 +967,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* The NCM data altsetting is fixed */ - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); + /* The NCM data altsetting is fixed, so we hard-coded it. + * Additionally, generic NCM devices are assumed to accept arbitrarily + * placed NDP. + */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0); /* * We should get an event when network connection is "connected" or @@ -986,6 +1002,14 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); + /* If NDP should be moved to the end of the NCM package, we can't follow the + * NTH16 header as we would normally do. NDP isn't written to the SKB yet, and + * the wNdpIndex field in the header is actually not consistent with reality. It will be later. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + if (ctx->delayed_ndp16->dwSignature == sign) + return ctx->delayed_ndp16; + /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); @@ -995,7 +1019,8 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } /* align new NDP */ - cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) @@ -1008,7 +1033,11 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + else + ndp16 = ctx->delayed_ndp16; + ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; @@ -1023,6 +1052,15 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) struct sk_buff *skb_out; u16 n = 0, index, ndplen; u8 ready2send = 0; + u32 delayed_ndp_size; + + /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated + * accordingly. Otherwise, we should check here. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + delayed_ndp_size = ctx->max_ndp_size; + else + delayed_ndp_size = 0; /* if there is a remaining skb, it gets priority */ if (skb != NULL) { @@ -1077,7 +1115,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); /* check if we had enough room left for both NDP and frame */ - if (!ndp16 || skb_out->len + skb->len > ctx->tx_max) { + if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -1150,6 +1188,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } + /* If requested, put NDP at end of frame. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; + cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max); + nth16->wNdpIndex = cpu_to_le16(skb_out->len); + memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size); + + /* Zero out delayed NDP - signature checking will naturally fail. */ + ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size); + } + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 735f7da..2680a65 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -73,11 +73,14 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; struct huawei_cdc_ncm_state *drvstate = (void *)&usbnet_dev->data; + int drvflags = 0; /* altsetting should always be 1 for NCM devices - so we hard-coded - * it here + * it here. Some huawei devices will need the NDP part of the NCM package to + * be at the end of the frame. */ - ret = cdc_ncm_bind_common(usbnet_dev, intf, 1); + drvflags |= CDC_NCM_FLAG_NDP_TO_END; + ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484..1f6526c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -- cgit v0.10.2 From fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:11 +0200 Subject: net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() pktgen_thread_worker() is obviously racy, kthread_stop() can come between the kthread_should_stop() check and set_current_state(). Signed-off-by: Oleg Nesterov Reported-by: Jan Stancek Reported-by: Marcelo Leitner Signed-off-by: David S. Miller diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 05badbb..41489e3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3572,8 +3572,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); -- cgit v0.10.2 From 1fbe4b46caca5b01b070af93d513031ffbcc480c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:13 +0200 Subject: net: pktgen: kill the "Wait for kthread_stop" code in pktgen_thread_worker() pktgen_thread_worker() doesn't need to wait for kthread_stop(), it can simply exit. Just pktgen_create_thread() and pg_net_exit() should do get_task_struct()/put_task_struct(). kthread_stop(dead_thread) is fine. Signed-off-by: Oleg Nesterov Signed-off-by: David S. Miller diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 41489e3..1ebdf1c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3571,15 +3571,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3771,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3893,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net) t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); kthread_stop(t->tsk); + put_task_struct(t->tsk); kfree(t); } -- cgit v0.10.2 From 35afd02e30d6368073df604920c4ea7cddadf5d0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jul 2015 01:36:27 +0200 Subject: cpufreq: Initialize the governor again while restoring policy When all CPUs of a policy are hot-unplugged, we EXIT the governor but don't mark policy->governor as NULL. This was done in order to keep last used governor's information intact in sysfs, while the CPUs are offline. But we also need to clear policy->governor when restoring the policy. Because policy->governor still points to the last governor while policy is restored, following sequence of event happens: - cpufreq_init_policy() called while restoring policy - find_governor() matches last_governor string for present governors and returns last used governor's pointer, say ondemand. policy->governor already has the same address, unless the governor was removed in between. - cpufreq_set_policy() is called with both old/new policies governor set as ondemand. - Because governors matched, we skip governor initialization and return after calling __cpufreq_governor(CPUFREQ_GOV_LIMITS). Because the governor wasn't initialized for this policy, it returned -EBUSY. - cpufreq_init_policy() exits the policy on this error, but doesn't destroy it properly (should be fixed separately). - And so we enter a scenario where the policy isn't completely initialized but used. Fix this by setting policy->governor to NULL while restoring the policy. Reported-and-tested-by: Pi-Cheng Chen Reported-and-tested-by: "Jon Medhurst (Tixy)" Reported-and-tested-by: Steven Rostedt Fixes: 18bf3a124ef8 (cpufreq: Mark policy->governor = NULL for inactive policies) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b612411..2c22e39 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1132,6 +1132,7 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) down_write(&policy->rwsem); policy->cpu = cpu; + policy->governor = NULL; up_write(&policy->rwsem); } -- cgit v0.10.2 From 5a31d594a9732a2fa2eb83b0c4dcba75da2dff5d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jul 2015 01:43:27 +0200 Subject: cpufreq: Allow freq_table to be obtained for offline CPUs Users of freq table may want to access it for any CPU from policy->related_cpus mask. One such user is cpu-cooling layer. It gets a list of 'clip_cpus' (equivalent to policy->related_cpus) during registration and tries to get freq_table for the first CPU of this mask. If the CPU, for which it tries to fetch freq_table, is offline, cpufreq_frequency_get_table() fails. This happens because it relies on cpufreq_cpu_get_raw() for its functioning which returns policy only for online CPUs. The fix is to access the policy data structure for the given CPU directly (which also returns a valid policy for offline CPUs), but the policy itself has to be active (meaning that at least one CPU using it is online) for the frequency table to be returned. Because we will be using 'cpufreq_cpu_data' now, which is internal to the cpufreq core, move cpufreq_frequency_get_table() to cpufreq.c. Reported-and-tested-by: Pi-Cheng Chen Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2c22e39..26063af 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -169,6 +169,15 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(get_governor_parent_kobj); +struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) +{ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + + return policy && !policy_is_inactive(policy) ? + policy->freq_table : NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); + static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index df14766..dfbbf981 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -297,15 +297,6 @@ int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show); -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); - -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - return policy ? policy->freq_table : NULL; -} -EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); - MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq frequency table helpers"); MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 1fb01ca93a1348a1469b8777326cd7632483de77 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 8 Jul 2015 15:26:39 +0800 Subject: ACPI / PCI: Fix regressions caused by resource_size_t overflow with 32-bit kernel Zoltan Boszormenyi reported this regression: "There's a Realtek RTL8111/8168/8411 (PCI ID 10ec:8168, Subsystem ID 1565:230e) network chip on the mainboard. After the r8169 driver loaded the IRQs in the machine went berserk. Keyboard keypressed arrived with considerable latency and duplicated, so no real work was possible. The machine responded to the power button but didn't actually power down. It just stuck at the powering down message. I had to press the power button for 4 seconds to power it down. The computer is a POS machine with a big battery inside. Because of this, either ACPI or the Realtek chip kept the bad state and after rebooting, the network chip didn't even show up in lspci. Not even the PXE ROM announced itself during boot. I had to disconnect the battery to beat some sense back to the computer. The regression happens with 4.0.5, 4.1.0-rc8 and 4.1.0-final. 3.18.16 was good." The regression is caused by commit 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation). Since commit 593669c2ac0f, x86 PCI ACPI host bridge driver validates ACPI resources by first converting an ACPI resource to a 'struct resource' structure and then applying checks against the converted resource structure. The 'start' and 'end' fields in 'struct resource' are defined to be type of resource_size_t, which may be 32 bits or 64 bits depending on CONFIG_PHYS_ADDR_T_64BIT. This may cause incorrect resource validation results with 32-bit kernels because 64-bit ACPI resource descriptors may get truncated when converting to 32-bit 'start' and 'end' fields in 'struct resource'. It eventually affects PCI resource allocation subsystem and makes some PCI devices and the system behave abnormally due to incorrect resource assignment. So enhance the ACPI resource parsing interfaces to ignore ACPI resource descriptors with address/offset above 4G when running in 32-bit mode. With the fix applied, the behavior of the machine was restored to how 3.18.16 worked, i.e. the memory range that is over 4GB is ignored again, and lspci -vvxxx shows that everything is at the same memory window as they were with 3.18.16. Reported-and-tested-by: Boszormenyi Zoltan Fixes: 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation) Signed-off-by: Jiang Liu Cc: 4.0+ # 4.0+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce..e8d2817 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -194,6 +194,7 @@ static bool acpi_decode_space(struct resource_win *win, u8 iodec = attr->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16; bool wp = addr->info.mem.write_protect; u64 len = attr->address_length; + u64 start, end, offset = 0; struct resource *res = &win->res; /* @@ -205,9 +206,6 @@ static bool acpi_decode_space(struct resource_win *win, pr_debug("ACPI: Invalid address space min_addr_fix %d, max_addr_fix %d, len %llx\n", addr->min_address_fixed, addr->max_address_fixed, len); - res->start = attr->minimum; - res->end = attr->maximum; - /* * For bridges that translate addresses across the bridge, * translation_offset is the offset that must be added to the @@ -215,12 +213,22 @@ static bool acpi_decode_space(struct resource_win *win, * primary side. Non-bridge devices must list 0 for all Address * Translation offset bits. */ - if (addr->producer_consumer == ACPI_PRODUCER) { - res->start += attr->translation_offset; - res->end += attr->translation_offset; - } else if (attr->translation_offset) { + if (addr->producer_consumer == ACPI_PRODUCER) + offset = attr->translation_offset; + else if (attr->translation_offset) pr_debug("ACPI: translation_offset(%lld) is invalid for non-bridge device.\n", attr->translation_offset); + start = attr->minimum + offset; + end = attr->maximum + offset; + + win->offset = offset; + res->start = start; + res->end = end; + if (sizeof(resource_size_t) < sizeof(u64) && + (offset != win->offset || start != res->start || end != res->end)) { + pr_warn("acpi resource window ([%#llx-%#llx] ignored, not CPU addressable)\n", + attr->minimum, attr->maximum); + return false; } switch (addr->resource_type) { @@ -237,8 +245,6 @@ static bool acpi_decode_space(struct resource_win *win, return false; } - win->offset = attr->translation_offset; - if (addr->producer_consumer == ACPI_PRODUCER) res->flags |= IORESOURCE_WINDOW; -- cgit v0.10.2 From fcc028c106e5750aa66dc43595b3f29f88801ff0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 9 Jul 2015 22:21:20 +0900 Subject: net: axienet: Fix devm_ioremap_resource return value check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Value returned by devm_ioremap_resource() was checked for non-NULL but devm_ioremap_resource() returns IOMEM_ERR_PTR, not NULL. In case of error this could lead to dereference of ERR_PTR. Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 46aa27df8853 ("net: axienet: Use devm_* calls") Reviewed-by: Sören Brinkmann Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 4208dd7..d95f9aa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1530,9 +1530,9 @@ static int axienet_probe(struct platform_device *pdev) /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); - if (!lp->regs) { + if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->regs); goto free_netdev; } @@ -1599,9 +1599,9 @@ static int axienet_probe(struct platform_device *pdev) goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); - if (!lp->dma_regs) { + if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->dma_regs); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); -- cgit v0.10.2 From a7d35f9d73e9ffa74a02304b817e579eec632f67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jul 2015 18:56:07 +0200 Subject: bridge: fix potential crash in __netdev_pick_tx() Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") fixed an issue in normal forward path, caused by sender_cpu & napi_id skb fields being an union. Bridge is another point where skb can be forwarded, so we need the same cure. Bug triggers if packet was received on a NIC using skb_mark_napi_id() Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Eric Dumazet Reported-by: Bob Liu Tested-by: Bob Liu Signed-off-by: David S. Miller diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b..0ff6e1b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } -- cgit v0.10.2 From 5d75a747596be046546eeb9d6ba39a3af851a1af Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 12:17:36 +0200 Subject: x86: hyperv: add CPUID bit for crash handlers Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 8fba544..f36d56b 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -108,6 +108,8 @@ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) +/* Guest crash data handler available */ +#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10) /* * Implementation recommendations. Indicates which behaviors the hypervisor -- cgit v0.10.2 From d1fe9219551e914f26219afaca1063b280f25963 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:03:18 +0200 Subject: KVM: x86: reintroduce kvm_is_mmio_pfn The call to get_mt_mask was really using kvm_is_reserved_pfn to detect an MMIO-backed page. In this case, we want "false" to be returned for the zero page. Reintroduce a separate kvm_is_mmio_pfn predicate for this use only. Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f807496..4417146 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2479,6 +2479,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } +static bool kvm_is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + + return true; +} + static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, gfn_t gfn, pfn_t pfn, bool speculative, @@ -2506,7 +2514,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, - kvm_is_reserved_pfn(pfn)); + kvm_is_mmio_pfn(pfn)); if (host_writable) spte |= SPTE_HOST_WRITEABLE; -- cgit v0.10.2 From 370777daab3f024f1645177039955088e2e9ae73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 3 Jul 2015 15:49:28 +0200 Subject: KVM: VMX: fix vmwrite to invalid VMCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fpu_activate is called outside of vcpu_load(), which means it should not touch VMCS, but fpu_activate needs to. Avoid the call by moving it to a point where we know that the guest needs eager FPU and VMCS is loaded. This will get rid of the following trace vmwrite error: reg 6800 value 0 (err 1) [] dump_stack+0x19/0x1b [] vmwrite_error+0x2c/0x2e [kvm_intel] [] vmcs_writel+0x1f/0x30 [kvm_intel] [] vmx_fpu_activate.part.61+0x45/0xb0 [kvm_intel] [] vmx_fpu_activate+0x15/0x20 [kvm_intel] [] kvm_arch_vcpu_create+0x51/0x70 [kvm] [] kvm_vm_ioctl+0x1c1/0x760 [kvm] [] ? handle_mm_fault+0x49a/0xec0 [] do_vfs_ioctl+0x2e5/0x4c0 [] ? file_has_perm+0xae/0xc0 [] SyS_ioctl+0xa1/0xc0 [] system_call_fastpath+0x16/0x1b (Note: we also unconditionally activate FPU in vmx_vcpu_reset(), so the removed code added nothing.) Fixes: c447e76b4cab ("kvm/fpu: Enable eager restore kvm FPU for MPX") Cc: Reported-by: Vlastimil Holer Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 64dd467..2fbea25 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + if (vcpu->arch.eager_fpu) + kvm_x86_ops->fpu_activate(vcpu); /* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbaf44e..6bd19c7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7315,11 +7315,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu = kvm_x86_ops->vcpu_create(kvm, id); - /* - * Activate fpu unconditionally in case the guest needs eager FPU. It will be - * deactivated soon if it doesn't. - */ - kvm_x86_ops->fpu_activate(vcpu); return vcpu; } -- cgit v0.10.2 From 5544eb9b81940647b8fad1f251b37cbe2819ce44 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:41:58 +0200 Subject: KVM: count number of assigned devices If there are no assigned devices, the guest PAT are not providing any useful information and can be overridden to writeback; VMX always does this because it has the "IPAT" bit in its extended page table entries, but SVM does not have anything similar. Hook into VFIO and legacy device assignment so that they provide this information to KVM. Reviewed-by: Alex Williamson Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d7..49ec903 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -604,6 +604,8 @@ struct kvm_arch { bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 7dbced3..5c520eb 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) goto out_unmap; } + kvm_arch_start_assignment(kvm); pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) iommu_detach_device(domain, &pdev->dev); pci_clear_dev_assigned(pdev); + kvm_arch_end_assignment(kvm); dev_info(&pdev->dev, "kvm deassign device\n"); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bd19c7..0024968 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8213,6 +8213,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_start_assignment(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); + +void kvm_arch_end_assignment(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { atomic_inc(&kvm->arch.noncoherent_dma_count); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd7..05e99b8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif +#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE +void kvm_arch_start_assignment(struct kvm *kvm); +void kvm_arch_end_assignment(struct kvm *kvm); +bool kvm_arch_has_assigned_device(struct kvm *kvm); +#else +static inline void kvm_arch_start_assignment(struct kvm *kvm) +{ +} + +static inline void kvm_arch_end_assignment(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return false; +} +#endif static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 620e37f..1dd087d 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) list_add_tail(&kvg->node, &kv->group_list); kvg->vfio_group = vfio_group; + kvm_arch_start_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_update_coherency(dev); @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) break; } + kvm_arch_end_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); + kvm_arch_end_assignment(dev->kvm); } kvm_vfio_update_coherency(dev); -- cgit v0.10.2 From 3c2e7f7de3240216042b61073803b61b9b3cfb22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 14:32:17 +0200 Subject: KVM: SVM: use NPT page attributes Right now, NPT page attributes are not used, and the final page attribute depends solely on gPAT (which however is not synced correctly), the guest MTRRs and the guest page attributes. However, we can do better by mimicking what is done for VMX. In the absence of PCI passthrough, the guest PAT can be ignored and the page attributes can be just WB. If passthrough is being used, instead, keep respecting the guest PAT, and emulate the guest MTRRs through the PAT field of the nested page tables. The only snag is that WP memory cannot be emulated correctly, because Linux's default PAT setting only includes the other types. Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974..414ec25 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +#define MTRR_TYPE_UC_MINUS 7 +#define MTRR2PROTVAL_INVALID 0xff + +static u8 mtrr2protval[8]; + +static u8 fallback_mtrr_type(int mtrr) +{ + /* + * WT and WP aren't always available in the host PAT. Treat + * them as UC and UC- respectively. Everything else should be + * there. + */ + switch (mtrr) + { + case MTRR_TYPE_WRTHROUGH: + return MTRR_TYPE_UNCACHABLE; + case MTRR_TYPE_WRPROT: + return MTRR_TYPE_UC_MINUS; + default: + BUG(); + } +} + +static void build_mtrr2protval(void) +{ + int i; + u64 pat; + + for (i = 0; i < 8; i++) + mtrr2protval[i] = MTRR2PROTVAL_INVALID; + + /* Ignore the invalid MTRR types. */ + mtrr2protval[2] = 0; + mtrr2protval[3] = 0; + + /* + * Use host PAT value to figure out the mapping from guest MTRR + * values to nested page table PAT/PCD/PWT values. We do not + * want to change the host PAT value every time we enter the + * guest. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + for (i = 0; i < 8; i++) { + u8 mtrr = pat >> (8 * i); + + if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) + mtrr2protval[mtrr] = __cm_idx2pte(i); + } + + for (i = 0; i < 8; i++) { + if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { + u8 fallback = fallback_mtrr_type(i); + mtrr2protval[i] = mtrr2protval[fallback]; + BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); + } + } +} + static __init int svm_hardware_setup(void) { int cpu; @@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + build_mtrr2protval(); return 0; err: @@ -1085,6 +1144,42 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + /* Unlike Intel, AMD takes the guest's CR0.CD into account. + * + * AMD does not have IPAT. To emulate it for the case of guests + * with no assigned devices, just set everything to WB. If guests + * have assigned devices, however, we cannot force WB for RAM + * pages only, so use the guest PAT directly. + */ + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + *g_pat = 0x0606060606060606; + else + *g_pat = vcpu->arch.pat; +} + +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + u8 mtrr; + + /* + * 1. MMIO: always map as UC + * 2. No passthrough: always map as WB, and force guest PAT to WB as well + * 3. Passthrough: can't guarantee the result, try to trust guest. + */ + if (is_mmio) + return _PAGE_NOCACHE; + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + return mtrr2protval[mtrr]; +} + static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1180,6 +1275,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; + svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -4088,11 +4184,6 @@ static bool svm_has_high_real_mode_segbase(void) return true; } -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - return 0; -} - static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } -- cgit v0.10.2 From e098223b789b4a618dacd79e5e0dad4a9d5018d1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 20 Apr 2015 19:25:31 +0200 Subject: KVM: SVM: Sync g_pat with guest-written PAT value When hardware supports the g_pat VMCB field, we can use it for emulating the PAT configuration that the guest configures by writing to the corresponding MSR. Signed-off-by: Jan Kiszka Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 414ec25..d36cfaf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3350,6 +3350,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_IA32_CR_PAT: + if (npt_enabled) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); + mark_dirty(svm->vmcb, VMCB_NPT); + break; + } + /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } -- cgit v0.10.2 From fd717f11015f673487ffc826e59b2bad69d20fe5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 14:38:13 +0200 Subject: KVM: x86: apply guest MTRR virtualization on host reserved pages Currently guest MTRR is avoided if kvm_is_reserved_pfn returns true. However, the guest could prefer a different page type than UC for such pages. A good example is that pass-throughed VGA frame buffer is not always UC as host expected. This patch enables full use of virtual guest MTRRs. Suggested-by: Xiao Guangrong Tested-by: Joerg Roedel (on AMD) Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d36cfaf..bbc678a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1166,14 +1166,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u8 mtrr; /* - * 1. MMIO: always map as UC + * 1. MMIO: trust guest MTRR, so same as item 3. * 2. No passthrough: always map as WB, and force guest PAT to WB as well * 3. Passthrough: can't guarantee the result, try to trust guest. */ - if (is_mmio) - return _PAGE_NOCACHE; - - if (!kvm_arch_has_assigned_device(vcpu->kvm)) + if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) return 0; mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e856dd5..5b4e938 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: always map as UC + * 1. MMIO: guest may want to apply WC, trust it. * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. + * result, try to trust guest. So the same as item 1. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; -- cgit v0.10.2 From ee4100da1616d6d151f97862b87e8a4e43d14b4c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 9 Jul 2015 15:44:52 +0800 Subject: kvm: x86: fix load xsave feature warning [ 68.196974] WARNING: CPU: 1 PID: 2140 at arch/x86/kvm/x86.c:3161 kvm_arch_vcpu_ioctl+0xe88/0x1340 [kvm]() [ 68.196975] Modules linked in: snd_hda_codec_hdmi i915 rfcomm bnep bluetooth i2c_algo_bit rfkill nfsd drm_kms_helper nfs_acl nfs drm lockd grace sunrpc fscache snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_dummy snd_seq_oss x86_pkg_temp_thermal snd_seq_midi kvm_intel snd_seq_midi_event snd_rawmidi kvm snd_seq ghash_clmulni_intel fuse snd_timer aesni_intel parport_pc ablk_helper snd_seq_device cryptd ppdev snd lp parport lrw dcdbas gf128mul i2c_core glue_helper lpc_ich video shpchp mfd_core soundcore serio_raw acpi_cpufreq ext4 mbcache jbd2 sd_mod crc32c_intel ahci libahci libata e1000e ptp pps_core [ 68.197005] CPU: 1 PID: 2140 Comm: qemu-system-x86 Not tainted 4.2.0-rc1+ #2 [ 68.197006] Hardware name: Dell Inc. OptiPlex 7020/0F5C5X, BIOS A03 01/08/2015 [ 68.197007] ffffffffa03b0657 ffff8800d984bca8 ffffffff815915a2 0000000000000000 [ 68.197009] 0000000000000000 ffff8800d984bce8 ffffffff81057c0a 00007ff6d0001000 [ 68.197010] 0000000000000002 ffff880211c1a000 0000000000000004 ffff8800ce0288c0 [ 68.197012] Call Trace: [ 68.197017] [] dump_stack+0x45/0x57 [ 68.197020] [] warn_slowpath_common+0x8a/0xc0 [ 68.197022] [] warn_slowpath_null+0x1a/0x20 [ 68.197029] [] kvm_arch_vcpu_ioctl+0xe88/0x1340 [kvm] [ 68.197035] [] ? kvm_arch_vcpu_load+0x4e/0x1c0 [kvm] [ 68.197040] [] kvm_vcpu_ioctl+0xc6/0x5c0 [kvm] [ 68.197043] [] ? perf_pmu_enable+0x22/0x30 [ 68.197044] [] ? perf_event_context_sched_in+0x7e/0xb0 [ 68.197048] [] do_vfs_ioctl+0x2c2/0x4a0 [ 68.197050] [] ? finish_task_switch+0x173/0x220 [ 68.197053] [] ? selinux_file_ioctl+0x4f/0xd0 [ 68.197055] [] ? security_file_ioctl+0x43/0x60 [ 68.197057] [] SyS_ioctl+0x79/0x90 [ 68.197060] [] entry_SYSCALL_64_fastpath+0x12/0x6a [ 68.197061] ---[ end trace 558a5ebf9445fc80 ]--- After commit (0c4109bec0 'x86/fpu/xstate: Fix up bad get_xsave_addr() assumptions'), there is no assumption an xsave bit is present in the hardware (pcntxt_mask) that it is always present in a given xsave buffer. An enabled state to be present on 'pcntxt_mask', but *not* in 'xstate_bv' could happen when the last 'xsave' did not request that this feature be saved (unlikely) or because the "init optimization" caused it to not be saved. This patch kill the assumption. Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0024968..5ef2560 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3157,8 +3157,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); memcpy(dest, src + offset, size); - } else - WARN_ON_ONCE(1); + } valid -= feature; } -- cgit v0.10.2 From e9e4dd3267d0c5234c5c0f47440456b10875dec9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:09 +0300 Subject: net: do not process device backlog during unregistration commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") fixes a problem where processed packet comes from device with destroyed inetdev (dev->ip_ptr). This is not expected because inetdev_destroy is called in NETDEV_UNREGISTER phase and packets should not be processed after dev_close_many() and synchronize_net(). Above fix is still required because inetdev_destroy can be called for other reasons. But it shows the real problem: backlog can keep packets for long time and they do not hold reference to device. Such packets are then delivered to upper levels at the same time when device is unregistered. Calling flush_backlog after NETDEV_UNREGISTER_FINAL still accounts all packets from backlog but before that some packets continue to be delivered to upper levels long after the synchronize_net call which is supposed to wait the last ones. Also, as Eric pointed out, processed packets, mostly from other devices, can continue to add new packets to backlog. Fix the problem by moving flush_backlog early, after the device driver is stopped and before the synchronize_net() call. Then use netif_running check to make sure we do not add more packets to backlog. We have to do it in enqueue_to_backlog context when the local IRQ is disabled. As result, after the flush_backlog and synchronize_net sequence all packets should be accounted. Thanks to Eric W. Biederman for the test script and his valuable feedback! Reported-by: Vittorio Gambaletta Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 1e57efd..6dd126a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3448,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3469,6 +3471,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -6135,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6770,8 +6774,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ -- cgit v0.10.2 From 2c17d27c36dcce2b6bf689f41a46b9e909877c21 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:10 +0300 Subject: net: call rcu_read_lock early in process_backlog Incoming packet should be either in backlog queue or in RCU read-side section. Otherwise, the final sequence of flush_backlog() and synchronize_net() may miss packets that can run without device reference: CPU 1 CPU 2 skb->dev: no reference process_backlog:__skb_dequeue process_backlog:local_irq_enable on_each_cpu for flush_backlog => IPI(hardirq): flush_backlog - packet not found in backlog CPU delayed ... synchronize_net - no ongoing RCU read-side sections netdev_run_todo, rcu_barrier: no ongoing callbacks __netif_receive_skb_core:rcu_read_lock - too late free dev process packet for freed dev Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 6dd126a..a8e4dd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3774,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3785,7 +3783,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3815,10 +3813,10 @@ skip_taps: if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) - goto unlock; + goto out; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -3836,7 +3834,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3848,7 +3846,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3902,8 +3900,7 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3934,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4501,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { -- cgit v0.10.2 From 51ed7f3e7d33824820837ad784801973f147c51a Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Thu, 9 Jul 2015 04:12:45 -0700 Subject: bridge: mdb: allow the user to delete mdb entry if there's a querier Until now when a querier was present static entries couldn't be deleted. Fix this and allow the user to manipulate the mdb with or without a querier. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 60868c2..c11cf26 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -423,19 +423,12 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_other_query.timer)) - return -EBUSY; - + if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - } else { - if (timer_pending(&br->ip6_other_query.timer)) - return -EBUSY; - + else ip.u.ip6 = entry->addr.u.ip6; #endif - } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); -- cgit v0.10.2 From 8220ea23243178c16b87fc3ccadf071647b64e04 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 10 Jul 2015 11:39:57 +0200 Subject: net: inet_diag: always export IPV6_V6ONLY sockopt for listening sockets Reconsidering my commit 20462155 "net: inet_diag: export IPV6_V6ONLY sockopt", I am not happy with the limitations it causes for socket analysing code in userspace. Exporting the value only if it is set makes it hard for userspace to decide whether the option is not set or the kernel does not support exporting the option at all. >From an auditor's perspective, the interesting question for listening AF_INET6 sockets is: "Does it NOT have IPV6_V6ONLY set?" Because it is the unexpected case. This patch allows to answer this question reliably. Signed-off-by: Phil Sutter Cc: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9bc2667..c3b1f3a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet6_sk(sk)->tclass) < 0) goto errout; - if (ipv6_only_sock(sk) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif -- cgit v0.10.2 From 145c37084e3e6584b95f82361922965cc3af41bf Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 10 Jul 2015 21:20:28 +0900 Subject: Doc: z8530book: Fix typo in API-z8530-sync-txdma-open.html This patch fix a spelling typo found in API-z8530-sync-txdma-open.html. It is because this file was generated from comment in source, I have to fix comment in source. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index feacc3b..2f0bd69 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -1044,7 +1044,7 @@ EXPORT_SYMBOL(z8530_sync_dma_close); * @dev: The network device to attach * @c: The Z8530 channel to configure in sync DMA mode. * - * Set up a Z85x30 device for synchronous DMA tranmission. One + * Set up a Z85x30 device for synchronous DMA transmission. One * ISA DMA channel must be available for this to work. The receive * side is run in PIO mode, but then it has the bigger FIFO. */ -- cgit v0.10.2 From 77b5a08427e87514c33730afc18cd02c9475e2c3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Mar 2015 08:37:46 -0700 Subject: bcache: don't embed 'return' statements in closure macros This is horribly confusing, it breaks the flow of the code without it being apparent in the caller. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index a08e3ee..79a6d63 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -320,7 +320,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) do { \ set_closure_fn(_cl, _fn, _wq); \ closure_sub(_cl, CLOSURE_RUNNING + 1); \ - return; \ } while (0) /** @@ -349,7 +348,6 @@ do { \ do { \ set_closure_fn(_cl, _fn, _wq); \ closure_queue(_cl); \ - return; \ } while (0) /** @@ -365,7 +363,6 @@ do { \ do { \ set_closure_fn(_cl, _destructor, NULL); \ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ - return; \ } while (0) /** diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index cb64e64a..bf6a9ca 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -105,6 +105,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) } while (n != bio); continue_at(&s->cl, bch_bio_submit_split_done, NULL); + return; submit: generic_make_request(bio); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ce64fc8..418607a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -592,12 +592,14 @@ static void journal_write_unlocked(struct closure *cl) if (!w->need_write) { closure_return_with_destructor(cl, journal_write_unlock); + return; } else if (journal_full(&c->journal)) { journal_reclaim(c); spin_unlock(&c->journal.lock); btree_flush_write(c); continue_at(cl, journal_write, system_wq); + return; } c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4afb2d2..f292790 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -88,8 +88,10 @@ static void bch_data_insert_keys(struct closure *cl) if (journal_ref) atomic_dec_bug(journal_ref); - if (!op->insert_data_done) + if (!op->insert_data_done) { continue_at(cl, bch_data_insert_start, op->wq); + return; + } bch_keylist_free(&op->insert_keys); closure_return(cl); @@ -216,8 +218,10 @@ static void bch_data_insert_start(struct closure *cl) /* 1 for the device pointer and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, 3 + (op->csum ? 1 : 0), - op->c)) + op->c)) { continue_at(cl, bch_data_insert_keys, op->wq); + return; + } k = op->insert_keys.top; bkey_init(k); @@ -255,6 +259,7 @@ static void bch_data_insert_start(struct closure *cl) op->insert_data_done = true; continue_at(cl, bch_data_insert_keys, op->wq); + return; err: /* bch_alloc_sectors() blocks if s->writeback = true */ BUG_ON(op->writeback); @@ -576,8 +581,10 @@ static void cache_lookup(struct closure *cl) ret = bch_btree_map_keys(&s->op, s->iop.c, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0), cache_lookup_fn, MAP_END_KEY); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { continue_at(cl, cache_lookup, bcache_wq); + return; + } closure_return(cl); } @@ -1085,6 +1092,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); + return; } else if (rw) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), -- cgit v0.10.2 From 22401ff17f993bda9acd14ac39213e6f9bb985d0 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 11 Jul 2015 17:30:01 +0200 Subject: cdc_ncm: update specs URL Update referenced specs link to reflect actual file version and location. Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1991e4a..db40175 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -6,7 +6,7 @@ * Original author: Hans Petter Selasky * * USB Host Driver for Network Control Model (NCM) - * http://www.usb.org/developers/devclass_docs/NCM10.zip + * http://www.usb.org/developers/docs/devclass_docs/NCM10_012011.zip * * The NCM encoding, decoding and initialization logic * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h -- cgit v0.10.2 From c590032f9ab8d03aab235a7601db22a267c2d958 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Fri, 10 Jul 2015 16:20:00 -0700 Subject: net: bcmgenet: fix accounting of packet drops vs errors bcmgenet driver needs to separate packet drops from packet errors. When the driver has to drop a *good* packet, due to lack of buffers or replacement skbs, increment only dev->stats.[rx|tx]_dropped. When the driver encounters a bad Rx packet or Tx error, increment only dev->stats.[rx|tx]_errors + relevant detailed error counter. Signed-off-by: Petri Gynther Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b43b2cb..64c1e9d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1230,7 +1230,6 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, new_skb = skb_realloc_headroom(skb, sizeof(*status)); dev_kfree_skb(skb); if (!new_skb) { - dev->stats.tx_errors++; dev->stats.tx_dropped++; return NULL; } @@ -1465,7 +1464,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!skb)) { dev->stats.rx_dropped++; - dev->stats.rx_errors++; goto next; } @@ -1493,7 +1491,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) { netif_err(priv, rx_status, dev, "dropping fragmented packet!\n"); - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; @@ -1515,7 +1512,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dev->stats.rx_frame_errors++; if (dma_flag & DMA_RX_LG) dev->stats.rx_length_errors++; - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; -- cgit v0.10.2 From 2ee94014d9bd3868b1c0d17405f96d63bec83f28 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 10 Jul 2015 19:48:58 -0400 Subject: net: switchdev: don't abort unsupported operations There is no need to abort attribute setting or object addition, if the prepare phase returned operation not supported. Thus, abort these two transactions only if the error is not -EOPNOTSUPP. Signed-off-by: Vivien Didelot Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a0..9f2add3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * released. */ - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); + if (err != -EOPNOTSUPP) { + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + } return err; } @@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * released. */ - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); + if (err != -EOPNOTSUPP) { + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + } return err; } -- cgit v0.10.2 From 8f5063e97f393d49611151d3cf7dcbeb41397f12 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:10 -0700 Subject: net: dsa: Test array index before use port_index is used an index into an array, and this information comes from Device Tree, make sure that port_index is not equal to the array size before using it. Move the check against port_index earlier in the loop. Fixes: 5e95329b701c: ("dsa: add device tree bindings to register DSA switches") Reported-by: Dan Carpenter Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 392e29a..52beeb8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev) continue; port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; port_name = of_get_property(port, "label", NULL); if (!port_name) @@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - if (port_index == DSA_MAX_PORTS) - break; } } -- cgit v0.10.2 From c8cf89f73f3d9ecbdea479778f0ac714be79be33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:11 -0700 Subject: net: dsa: Fix off-by-one in switch address parsing cd->sw_addr is used as a MDIO bus address, which cannot exceed PHY_MAX_ADDR (32), our check was off-by-one. Fixes: 5e95329b701c ("dsa: add device tree bindings to register DSA switches") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 52beeb8..b445d49 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev) continue; cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr > PHY_MAX_ADDR) + if (cd->sw_addr >= PHY_MAX_ADDR) continue; if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) -- cgit v0.10.2 From 5e63e6baa159fa8c787cf783dbf3d77fbea97331 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:32:46 +0300 Subject: can: rcar_can: fix IRQ check rcar_can_probe() regards 0 as a wrong IRQ #, despite platform_get_irq() that it calls returns negative error code in that case. This leads to the following being printed to the console when attempting to open the device: error requesting interrupt fffffffa because rcar_can_open() calls request_irq() with a negative IRQ #, and that function naturally fails with -EINVAL. Check for the negative error codes instead and propagate them upstream instead of just returning -ENODEV. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7deb80d..93017c0 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -758,8 +758,9 @@ static int rcar_can_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); + err = irq; goto fail; } -- cgit v0.10.2 From c1a4c87b06fa564d6e2760a12d4e5a09badc684b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:33:53 +0300 Subject: can: rcar_can: print signed IRQ # Printing IRQ # using "%x" and "%u" unsigned formats isn't quite correct as 'ndev->irq' is of type *int*, so the "%d" format needs to be used instead. While fixing this, beautify the dev_info() message in rcar_can_probe() a bit. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 93017c0..2f9ebad 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -526,7 +526,7 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); @@ -824,7 +824,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", priv->regs, ndev->irq); return 0; -- cgit v0.10.2 From 3255f68c132a8ed784b9fe1804ef4642a1d16b9a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:34:55 +0300 Subject: can: rcar_can: fix typo in error message Fix typo in the first error message printed by rcar_can_open(). Based on the original patch by Vladimir Barinov. Fixes: 862e2b6af941 ("can: rcar_can: support all input clocks") Reported-by: Vladimir Barinov Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 2f9ebad..310a0cd 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -508,7 +508,8 @@ static int rcar_can_open(struct net_device *ndev) err = clk_prepare_enable(priv->clk); if (err) { - netdev_err(ndev, "failed to enable periperal clock, error %d\n", + netdev_err(ndev, + "failed to enable peripheral clock, error %d\n", err); goto out; } -- cgit v0.10.2 From ae185f19662565f5320e49247faab8752a977642 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:50:35 +0300 Subject: can: rcar_can: print request_irq() error code Also print the error code when the request_irq() call fails in rcar_can_open(), rewording the error message... Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 310a0cd..5e81aff 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -527,7 +527,8 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); + netdev_err(ndev, "request_irq(%d) failed, error %d\n", + ndev->irq, err); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); -- cgit v0.10.2 From 585bc2ac4cc0e2398f5b128018247f51fa3e0e12 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:51:34 +0300 Subject: can: rcar_can: unify error messages All the error messages in the driver but the ones from devm_clk_get() failures use similar format. Make those two messages consitent with others. Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 5e81aff..7bd5419 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -785,7 +785,8 @@ static int rcar_can_probe(struct platform_device *pdev) priv->clk = devm_clk_get(&pdev->dev, "clkp1"); if (IS_ERR(priv->clk)) { err = PTR_ERR(priv->clk); - dev_err(&pdev->dev, "cannot get peripheral clock: %d\n", err); + dev_err(&pdev->dev, "cannot get peripheral clock, error %d\n", + err); goto fail_clk; } @@ -797,7 +798,7 @@ static int rcar_can_probe(struct platform_device *pdev) priv->can_clk = devm_clk_get(&pdev->dev, clock_names[clock_select]); if (IS_ERR(priv->can_clk)) { err = PTR_ERR(priv->can_clk); - dev_err(&pdev->dev, "cannot get CAN clock: %d\n", err); + dev_err(&pdev->dev, "cannot get CAN clock, error %d\n", err); goto fail_clk; } -- cgit v0.10.2 From 033365191136c97f88c81b7bd0011414db28bb4e Mon Sep 17 00:00:00 2001 From: "J.D. Schroeder" Date: Wed, 8 Jul 2015 14:38:12 +0300 Subject: can: c_can: Fix default pinmux glitch at init The previous change 3973c526ae9c (net: can: c_can: Disable pins when CAN interface is down) causes a slight glitch on the pinctrl settings when used. Since commit ab78029 (drivers/pinctrl: grab default handles from device core), the device core will automatically set the default pins. This causes the pins to be momentarily set to the default and then to the sleep state in register_c_can_dev(). By adding an optional "enable" state, boards can set the default pin state to be disabled and avoid the glitch when the switch from default to sleep first occurs. If the "enable" state is not available c_can_pinctrl_select_state() falls back to using the "default" pinctrl state. [Roger Q] - Forward port to v4.2 and use pinctrl_get_select(). Signed-off-by: J.D. Schroeder Signed-off-by: Roger Quadros Reviewed-by: Grygorii Strashko Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 041525d..5d214d1 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,6 +592,7 @@ static int c_can_start(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); int err; + struct pinctrl *p; /* basic c_can configuration */ err = c_can_chip_config(dev); @@ -604,8 +605,13 @@ static int c_can_start(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; - /* activate pins */ - pinctrl_pm_select_default_state(dev->dev.parent); + /* Attempt to use "active" if available else use "default" */ + p = pinctrl_get_select(priv->device, "active"); + if (!IS_ERR(p)) + pinctrl_put(p); + else + pinctrl_pm_select_default_state(priv->device); + return 0; } -- cgit v0.10.2 From 2acb5c301edf39ab6d066687ce70da1166e4de9e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 7 Jul 2015 17:27:57 +0300 Subject: ARM: dts: dra7x-evm: Prevent glitch on DCAN1 pinmux Driver core sets "default" pinmux on on probe and CAN driver sets "sleep" pinmux during register. This causes a small window where the CAN pins are in "default" state with the DCAN module being disabled. Change the "default" state to be like sleep so this glitch is avoided. Add a new "active" state that is used by the driver when CAN is actually active. Signed-off-by: Roger Quadros Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa46590..096f68b 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index 4e1b605..8037384 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -587,9 +587,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { -- cgit v0.10.2 From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e9b1810..aede704 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,9 +440,6 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -578,7 +575,6 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -589,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -607,7 +604,6 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -618,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index f64f529..a23a7af 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,7 +207,6 @@ static void slc_bump(struct slcan *sl) if (!skb) return; - __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -215,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 0ce868d..674f367 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,9 +78,6 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx_ni(skb); } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4..51bb653 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62..166d436 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453..a1ba687 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748..2e67b14 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) -- cgit v0.10.2 From d9ee489619744ee5ac246b8fb3dd65bb078d2f0a Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 14:21:21 +1000 Subject: m68knommu: force setting of CONFIG_CLOCK_FREQ for ColdFire It is possible to disable the clock selection at configuration time, but for ColdFire targets we always expect a clock frequency to be selected. This results in the following compile time error: CC arch/m68k/kernel/asm-offsets.s In file included from ./arch/m68k/include/asm/timex.h:14:0, from include/linux/timex.h:65, from include/linux/sched.h:19, from arch/m68k/kernel/asm-offsets.c:14: ./arch/m68k/include/asm/coldfire.h:25:2: error: #error "Don't know what your ColdFire CPU clock frequency is??" Remove CONFIG_CLOCK_SELECT completely and always enable CONFIG_CLOCK_FREQ for ColdFire. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 33013df..7da5949 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -416,22 +416,10 @@ config HAVE_MBAR config HAVE_IPSBAR bool -config CLOCK_SET - bool "Enable setting the CPU clock frequency" - depends on COLDFIRE - default n - help - On some CPU's you do not need to know what the core CPU clock - frequency is. On these you can disable clock setting. On some - traditional 68K parts, and on all ColdFire parts you need to set - the appropriate CPU clock frequency. On these devices many of the - onboard peripherals derive their timing from the master CPU clock - frequency. - config CLOCK_FREQ int "Set the core clock frequency" default "66666666" - depends on CLOCK_SET + depends on COLDFIRE help Define the CPU clock frequency in use. This is the core clock frequency, it may or may not be the same as the external clock diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h index c94557b..50aa4da 100644 --- a/arch/m68k/include/asm/coldfire.h +++ b/arch/m68k/include/asm/coldfire.h @@ -19,7 +19,7 @@ * in any case new boards come along from time to time that have yet * another different clocking frequency. */ -#ifdef CONFIG_CLOCK_SET +#ifdef CONFIG_CLOCK_FREQ #define MCF_CLK CONFIG_CLOCK_FREQ #else #error "Don't know what your ColdFire CPU clock frequency is??" -- cgit v0.10.2 From 15c2ca4e98a3cd265b17db126e2e6b752f99014d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 15:01:53 +1000 Subject: m68knommu: improve the clock configuration defaults Create some intelligent default settings for each ColdFire SoC type in the configuration entry for CONFIG_CLOCK_FREQ. The ColdFire clock frequency is configurable at build time. There is a lot of variation in the frequency of operation on specific ColdFire based boards. But we can choose a default that matches the maximum frequency of clock operation for a particular ColdFire part. That is typically the most common clock setting. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 7da5949..62f590e 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -418,6 +418,14 @@ config HAVE_IPSBAR config CLOCK_FREQ int "Set the core clock frequency" + default "25000000" if M5206 + default "54000000" if M5206e + default "166666666" if M520x + default "140000000" if M5249 + default "150000000" if M527x || M523x + default "90000000" if M5307 + default "50000000" if M5407 + default "266000000" if M54xx default "66666666" depends on COLDFIRE help -- cgit v0.10.2 From fa95a1dd0819c9041a873b10a6d83b5134964154 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 15:44:02 +1000 Subject: m68knommu: make ColdFire SoC selection a choice It would be nice if we could support multiple ColdFire SoC types in a single binary - but currently the code simply does not support it. Change the SoC selection config options to be a choice instead of individual selectable entries. This fixes problems with building allnoconfig, and means that a sane linux kernel is generated for a single ColdFire SoC type. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 62f590e..c496d48 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -125,6 +125,13 @@ endif # M68KCLASSIC if COLDFIRE +choice + prompt "ColdFire SoC type" + default M520x + help + Select the type of ColdFire System-on-Chip (SoC) that you want + to build for. + config M5206 bool "MCF5206" depends on !MMU @@ -174,9 +181,6 @@ config M525x help Freescale (Motorola) Coldfire 5251/5253 processor support. -config M527x - bool - config M5271 bool "MCF5271" depends on !MMU @@ -223,9 +227,6 @@ config M5307 help Motorola ColdFire 5307 processor support. -config M53xx - bool - config M532x bool "MCF532x" depends on !MMU @@ -251,9 +252,6 @@ config M5407 help Motorola ColdFire 5407 processor support. -config M54xx - bool - config M547x bool "MCF547x" select M54xx @@ -280,6 +278,17 @@ config M5441x help Freescale Coldfire 54410/54415/54416/54417/54418 processor support. +endchoice + +config M527x + bool + +config M53xx + bool + +config M54xx + bool + endif # COLDFIRE -- cgit v0.10.2 From bfd302acc57ceed1b4d37926c8bbe3cb9d9f1098 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:27:29 +1000 Subject: m68knommu: update defconfig for m5208evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig index e7292f4..4c7b793 100644 --- a/arch/m68k/configs/m5208evb_defconfig +++ b/arch/m68k/configs/m5208evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,17 +12,12 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_M520x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=166666666 -CONFIG_CLOCK_DIV=2 -CONFIG_M5208EVB=y +# CONFIG_MMU is not set # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x40000000 CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x40000000 CONFIG_KERNELBASE=0x40020000 -CONFIG_RAM16BIT=y CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y @@ -40,24 +31,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_BAUDRATE=115200 CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -68,8 +54,6 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" +CONFIG_FULLDEBUG=y -- cgit v0.10.2 From 0f28b05a4b127dd786d589a44d49ed6ade9a66fe Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:29:17 +1000 Subject: m68knommu: update defconfig for ColdFire m5249evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig index 0cd4b39..a782f36 100644 --- a/arch/m68k/configs/m5249evb_defconfig +++ b/arch/m68k/configs/m5249evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5249=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=140000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5249C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -62,7 +51,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set -- cgit v0.10.2 From 2e27f44383fb64db65eeeba6b25f9cc986284103 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:30:58 +1000 Subject: m68knommu: update defconfig for ColdFire m5272c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig index a60cb35..6f5fb92 100644 --- a/arch/m68k/configs/m5272c3_defconfig +++ b/arch/m68k/configs/m5272c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,8 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5272=y -CONFIG_CLOCK_SET=y CONFIG_M5272C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -36,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -61,6 +52,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -- cgit v0.10.2 From 6845f6e10290877a820000c3647129837ef93c1b Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:31:30 +1000 Subject: m68knommu: update defconfig for ColdFire m5275evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig index e6502ab..b5d7cd1 100644 --- a/arch/m68k/configs/m5275evb_defconfig +++ b/arch/m68k/configs/m5275evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,11 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5275=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=150000000 -CONFIG_CLOCK_DIV=2 -CONFIG_M5275EVB=y # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -39,24 +32,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -65,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set -- cgit v0.10.2 From 59c024b742f4b9ef9be08baebabf15ca7e843b5d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:31:56 +1000 Subject: m68knommu: update defconfig for ColdFire m5307c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig index 023812a..1b4c094 100644 --- a/arch/m68k/configs/m5307c3_defconfig +++ b/arch/m68k/configs/m5307c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5307=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=90000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5307C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,16 +32,11 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y CONFIG_SLIP=y CONFIG_SLIP_COMPRESSED=y @@ -56,21 +45,17 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set +CONFIG_FULLDEBUG=y -- cgit v0.10.2 From fee539223f72c0f969711673fd1ddf613a522d6e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:33:08 +1000 Subject: m68knommu: update defconfig for ColdFire m5407c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig index 557b39f..275ad543 100644 --- a/arch/m68k/configs/m5407c3_defconfig +++ b/arch/m68k/configs/m5407c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -17,9 +13,8 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5407=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=50000000 CONFIG_M5407C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -38,22 +33,17 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -63,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set -- cgit v0.10.2 From 8700f09495dec922600836400d5e26a22f50bf19 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:34:34 +1000 Subject: m68knommu: update defconfig for ColdFire m5475evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index c5018a6..78665b5 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -1,11 +1,7 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_SYSCTL_SYSCALL=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -20,9 +16,6 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_COLDFIRE=y -CONFIG_M547x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=266000000 # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x0 CONFIG_RAMSIZE=0x2000000 @@ -32,7 +25,6 @@ CONFIG_KERNELBASE=0x20000 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y -- cgit v0.10.2 From 03aa29f80e371c75370e83eded7b551951385122 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 8 Jul 2015 16:54:13 +1000 Subject: m68k: fix io functions for ColdFire/MMU/PCI case The inb/outb/... family of IO methods end up being multiply defined when building PCI support for the ColdFire. Compiling gives this: CC init/main.o In file included from ./arch/m68k/include/asm/io.h:4:0, from include/linux/bio.h:30, from include/linux/blkdev.h:18, from init/main.c:75: ./arch/m68k/include/asm/io_mm.h:420:0: warning: "inb" redefined ./arch/m68k/include/asm/io_mm.h:108:0: note: this is the location of the previous definition ... The ColdFire/PCI case defines its own IO access methods, so no others should be defined or used in this case. Conditionally disable other definitions that clash with it. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index 618c85d3..f55cad5 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -413,7 +413,8 @@ static inline void isa_delay(void) #define writew(val, addr) out_le16((addr), (val)) #endif /* CONFIG_ATARI_ROM_ISA */ -#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) +#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \ + !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE)) /* * We need to define dummy functions for GENERIC_IOMAP support. */ -- cgit v0.10.2 From 67592f699cb309559575b89d727d510ea076521c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 8 Jul 2015 17:02:08 +1000 Subject: m68k: enable PCI support for m5475evb defconfig The ColdFire M5475 on the m5475evb board supports a PCI bus, lets enable it for the defconfig to get better build and test coverage. Signed-off-by: Greg Ungerer diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index 78665b5..4f4ccd13 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -22,6 +22,7 @@ CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x0 CONFIG_MBAR=0xff000000 CONFIG_KERNELBASE=0x20000 +CONFIG_PCI=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -- cgit v0.10.2 From f51e2f1911122879eefefa4c592dea8bf794b39c Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 13 Jul 2015 10:25:17 +0300 Subject: ARC: make sure instruction_pointer() returns unsigned value Currently instruction_pointer() returns pt_regs->ret and so return value is of type "long", which implicitly stands for "signed long". While that's perfectly fine when dealing with 32-bit values if return value of instruction_pointer() gets assigned to 64-bit variable sign extension may happen. And at least in one real use-case it happens already. In perf_prepare_sample() return value of perf_instruction_pointer() (which is an alias to instruction_pointer() in case of ARC) is assigned to (struct perf_sample_data)->ip (which type is "u64"). And what we see if instuction pointer points to user-space application that in case of ARC lays below 0x8000_0000 "ip" gets set properly with leading 32 zeros. But if instruction pointer points to kernel address space that starts from 0x8000_0000 then "ip" is set with 32 leadig "f"-s. I.e. id instruction_pointer() returns 0x8100_0000, "ip" will be assigned with 0xffff_ffff__8100_0000. Which is obviously wrong. In particular that issuse broke output of perf, because perf was unable to associate addresses like 0xffff_ffff__8100_0000 with anything from /proc/kallsyms. That's what we used to see: ----------->8---------- 6.27% ls [unknown] [k] 0xffffffff8046c5cc 2.96% ls libuClibc-0.9.34-git.so [.] memcpy 2.25% ls libuClibc-0.9.34-git.so [.] memset 1.66% ls [unknown] [k] 0xffffffff80666536 1.54% ls libuClibc-0.9.34-git.so [.] 0x000224d6 1.18% ls libuClibc-0.9.34-git.so [.] 0x00022472 ----------->8---------- With that change perf output looks much better now: ----------->8---------- 8.21% ls [kernel.kallsyms] [k] memset 3.52% ls libuClibc-0.9.34-git.so [.] memcpy 2.11% ls libuClibc-0.9.34-git.so [.] malloc 1.88% ls libuClibc-0.9.34-git.so [.] memset 1.64% ls [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.41% ls [kernel.kallsyms] [k] __d_lookup_rcu ----------->8---------- Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 9175597..91694ec 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -106,7 +106,7 @@ struct callee_regs { long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) ((regs)->ret) +#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ -- cgit v0.10.2 From 624b71ee20acba269e348eb6bdd516d47b9d30fa Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sun, 12 Jul 2015 13:16:50 +0530 Subject: ARCv2: support HS38 releases Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 08d98ee..18cc015 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -142,11 +142,15 @@ static void read_arc_build_cfg_regs(void) } static const struct cpuinfo_data arc_cpu_tbl[] = { +#ifdef CONFIG_ISA_ARCOMPACT { {0x20, "ARC 600" }, 0x2F}, { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, { {0x35, "ARC 700 R4.11"}, 0x35}, - { {0x50, "ARC HS38" }, 0x51}, +#else + { {0x50, "ARC HS38 R2.0"}, 0x51}, + { {0x52, "ARC HS38 R2.1"}, 0x52}, +#endif { {0x00, NULL } } }; -- cgit v0.10.2 From f81a49d13b3014c2b7c424628779a8af93f25c04 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 10 Jul 2015 10:47:09 +0200 Subject: s390/dasd: fix kernel panic when alias is set offline The dasd device driver selects which (alias or base) device is used for a given requests when the request is build. If the chosen alias device is set offline before the request gets queued to the device queue the starting function may use device structures that are already freed. This might lead to a hanging offline process or a kernel panic. Add a check to the starting function that returns the request to the upper layer if the device is already in offline processing. In addition to that prevent that an alias device that's already in offline processing gets chosen as start device. Reviewed-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 1aec8ff..f73d2f5 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1863,6 +1863,33 @@ static void __dasd_device_check_expire(struct dasd_device *device) } /* + * return 1 when device is not eligible for IO + */ +static int __dasd_device_is_unusable(struct dasd_device *device, + struct dasd_ccw_req *cqr) +{ + int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM); + + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + /* dasd is being set offline. */ + return 1; + } + if (device->stopped) { + if (device->stopped & mask) { + /* stopped and CQR will not change that. */ + return 1; + } + if (!test_bit(DASD_CQR_VERIFY_PATH, &cqr->flags)) { + /* CQR is not able to change device to + * operational. */ + return 1; + } + /* CQR required to get device operational. */ + } + return 0; +} + +/* * Take a look at the first request on the ccw queue and check * if it needs to be started. */ @@ -1876,13 +1903,8 @@ static void __dasd_device_start_head(struct dasd_device *device) cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); if (cqr->status != DASD_CQR_QUEUED) return; - /* when device is stopped, return request to previous layer - * exception: only the disconnect or unresumed bits are set and the - * cqr is a path verification request - */ - if (device->stopped && - !(!(device->stopped & ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM)) - && test_bit(DASD_CQR_VERIFY_PATH, &cqr->flags))) { + /* if device is not usable return request to upper layer */ + if (__dasd_device_is_unusable(device, cqr)) { cqr->intrc = -EAGAIN; cqr->status = DASD_CQR_CLEARED; dasd_schedule_device_bh(device); diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index a2597e6..ee3a6fa 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -699,7 +699,8 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) struct dasd_device, alias_list); spin_unlock_irqrestore(&lcu->lock, flags); alias_priv = (struct dasd_eckd_private *) alias_device->private; - if ((alias_priv->count < private->count) && !alias_device->stopped) + if ((alias_priv->count < private->count) && !alias_device->stopped && + !test_bit(DASD_FLAG_OFFLINE, &alias_device->flags)) return alias_device; else return NULL; -- cgit v0.10.2 From e47994dd44bcb4a77b4152bd0eada585934703c0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jul 2015 15:02:37 +0200 Subject: s390/process: fix sfpc inline assembly The sfpc inline assembly within execve_tail() may incorrectly set bits 28-31 of the sfpc instruction to a value which is not zero. These bits however are currently unused and therefore should be zero so we won't get surprised if these bits will be used in the future. Therefore remove the second operand from the inline assembly. Cc: Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dc5edc2..8f587d8 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; - asm volatile("sfpc %0,%0" : : "d" (0)); + asm volatile("sfpc %0" : : "d" (0)); } /* -- cgit v0.10.2 From cad49cfc44a5160e3fa09b18e4e7f7cacd13f27d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jul 2015 08:40:49 +0200 Subject: s390/nmi: fix vector register corruption If a machine check happens, the machine has the vector facility installed and the extended save area exists, the cpu will save vector register contents into the extended save area. This is regardless of control register 0 contents, which enables and disables the vector facility during runtime. On each machine check we should validate the vector registers. The current code however tries to validate the registers only if the running task is using vector registers in user space. However even the current code is broken and causes vector register corruption on machine checks, if user space uses them: the prefix area contains a pointer (absolute address) to the machine check extended save area. In order to save some space the save area was put into an unused area of the second prefix page. When validating vector register contents the code uses the absolute address of the extended save area, which is wrong. Due to prefixing the vector instructions will then access contents using absolute addresses instead of real addresses, where the machine stored the contents. If the above would work there is still the problem that register validition would only happen if user space uses vector registers. If kernel space uses them also, this may also lead to vector register content corruption: if the kernel makes use of vector instructions, but the current running user space context does not, the machine check handler will validate floating point registers instead of vector registers. Given the fact that writing to a floating point register may change the upper halve of the corresponding vector register, we also experience vector register corruption in this case. Fix all of these issues, and always validate vector registers on each machine check, if the machine has the vector facility installed and the extended save area is defined. Cc: # 4.1+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index cfad7fca..d7697ab 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -57,7 +57,10 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 23; + unsigned long : 4; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 17; }; }; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 505c17c..56b5508 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include struct mcck_struct { int kill_task; @@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci) } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - /* Revalidate vector registers */ - if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!MACHINE_HAS_VX) { + /* Revalidate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } else { + /* Revalidate vector registers */ + union ctlreg0 cr0; + if (!mci->vr) { /* * Vector registers can't be restored and therefore @@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) */ kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); restore_vx_regs((__vector128 *) - S390_lowcore.vector_save_area_addr); + &S390_lowcore.vector_save_area); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ asm volatile( -- cgit v0.10.2 From ed056764271e71004b3118619de1ebfefb2acf6b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:01 -0400 Subject: Revert "nfs: take extra reference to fl->fl_file when running a LOCKU operation" This reverts commit db2efec0caba4f81a22d95a34da640b86c313c8e. William reported that he was seeing instability with this patch, which is likely due to the fact that it can cause the kernel to take a new reference to a filp after the last reference has already been put. Revert this patch for now, as we'll need to fix this in another way. Cc: stable@vger.kernel.org Reported-by: William Dauchy Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f228b5..60be01f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5484,7 +5484,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5496,7 +5495,6 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); - fput(calldata->fl.fl_file); kfree(calldata); } -- cgit v0.10.2 From bcd7f78d078ff6197715c1ed070c92aca57ec12c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: locks: have flock_lock_file take an inode pointer instead of a filp ...and rename it to better describe how it works. In order to fix a use-after-free in NFS, we need to be able to remove locks from an inode after the filp associated with them may have already been freed. flock_lock_file already only dereferences the filp to get to the inode, so just change it so the callers do that. All of the callers already pass in a lock request that has the fl_file set properly, so we don't need to pass it in individually. With that change it now only dereferences the filp to get to the inode, so just push that out to the callers. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" diff --git a/fs/locks.c b/fs/locks.c index 653faab..4366b7c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); int error = 0; bool found = false; LIST_HEAD(dispose); @@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, fl_list) { - if (filp != fl->fl_file) + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; @@ -1862,7 +1861,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(file_inode(filp), fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -2401,7 +2400,8 @@ locks_remove_flock(struct file *filp) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct file_lock_context *flctx = file_inode(filp)->i_flctx; + struct inode *inode = file_inode(filp); + struct file_lock_context *flctx = inode->i_flctx; if (list_empty(&flctx->flc_flock)) return; @@ -2409,7 +2409,7 @@ locks_remove_flock(struct file *filp) if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else - flock_lock_file(filp, &fl); + flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); -- cgit v0.10.2 From 29d01b22eaa18d8b46091d3c98c6001c49f78e4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: locks: new helpers - flock_lock_inode_wait and posix_lock_inode_wait Allow callers to pass in an inode instead of a filp. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" diff --git a/fs/locks.c b/fs/locks.c index 4366b7c..ba268a5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1163,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1188,6 +1187,21 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(posix_lock_inode_wait); + +/** + * posix_lock_file_wait - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a POSIX style lock to a file. + * We merge adjacent & overlapping locks whenever possible. + * POSIX locks are sorted by owner task, then by starting address + */ +int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(posix_lock_file_wait); /** @@ -1850,18 +1864,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_inode(file_inode(filp), fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1873,7 +1887,19 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(flock_lock_inode_wait); +/** + * flock_lock_file_wait - Apply a FLOCK-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a FLOCK style lock to a file. + */ +int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(flock_lock_file_wait); /** diff --git a/include/linux/fs.h b/include/linux/fs.h index a0653e5..4c990ed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1046,11 +1046,13 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1137,6 +1139,12 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) { return -ENOLCK; @@ -1163,6 +1171,12 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) +{ + return -ENOLCK; +} + static inline int flock_lock_file_wait(struct file *filp, struct file_lock *request) { -- cgit v0.10.2 From 83bfff23e9ed19f37c4ef0bba84e75bd88e5cf21 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: nfs4: have do_vfs_lock take an inode pointer Now that we have file locking helpers that can deal with an inode instead of a filp, we can change the NFSv4 locking code to use that instead. This should fix the case where we have a filp that is closed while flock or OFD locks are set on it, and the task is signaled so that it doesn't wait for the LOCKU reply to come in before the filp is freed. At that point we can end up with a use-after-free with the current code, which relies on dereferencing the fl_file in the lock request. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 60be01f..8bee934 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5439,15 +5439,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +static int do_vfs_lock(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: - res = posix_lock_file_wait(file, fl); + res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: - res = flock_lock_file_wait(file, fl); + res = flock_lock_inode_wait(inode, fl); break; default: BUG(); @@ -5507,7 +5507,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5615,7 +5615,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (do_vfs_lock(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5756,7 +5756,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -5998,7 +5998,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); if (status < 0) goto out; down_read(&nfsi->rwsem); @@ -6006,7 +6006,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); up_read(&nfsi->rwsem); goto out; } -- cgit v0.10.2 From ee296d7c5709440f8abd36b5b65c6b3e388538d9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: locks: inline posix_lock_file_wait and flock_lock_file_wait They just call file_inode and then the corresponding *_inode_file_wait function. Just make them static inlines instead. Signed-off-by: Jeff Layton diff --git a/fs/locks.c b/fs/locks.c index ba268a5..d3d558b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1190,21 +1190,6 @@ int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) EXPORT_SYMBOL(posix_lock_inode_wait); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address - */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return posix_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(posix_lock_file_wait); - -/** * locks_mandatory_locked - Check for an active lock * @file: the file to check * @@ -1890,19 +1875,6 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) EXPORT_SYMBOL(flock_lock_inode_wait); /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a FLOCK style lock to a file. - */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return flock_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(flock_lock_file_wait); - -/** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. * @cmd: the type of lock to apply. diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c990ed..cc008c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1047,13 +1047,11 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_inode_wait(struct inode *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1145,11 +1143,6 @@ static inline int posix_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1177,12 +1170,6 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1216,6 +1203,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -2025,11 +2026,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; -- cgit v0.10.2 From ba0ef85479c46a2ab354c2220bdb6152f7f4baf3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jun 2015 13:15:31 -0600 Subject: tpm: Fix initialization of the cdev When a cdev is contained in a dynamic structure the cdev parent kobj should be set to the kobj that controls the lifetime of the enclosing structure. In TPM's case this is the embedded struct device. Also, cdev_init 0's the whole structure, so all sets must be after, not before. This fixes module ref counting and cdev. Cc: Fixes: 313d21eeab92 ("tpm: device class for tpm") Signed-off-by: Jason Gunthorpe Reviewed-by: Dmitry Torokhov Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Peter Huewe diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 283f00a..1082d4b 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -129,8 +129,9 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); - chip->cdev.owner = chip->pdev->driver->owner; cdev_init(&chip->cdev, &tpm_fops); + chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.kobj.parent = &chip->dev.kobj; return chip; } -- cgit v0.10.2 From b371616b8537d6450ebca0819defbf53452bebf3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 24 Jun 2015 17:14:55 +0300 Subject: tpm, tpm_crb: fail when TPM2 ACPI table contents look corrupted At least some versions of AMI BIOS have corrupted contents in the TPM2 ACPI table and namely the physical address of the control area is set to zero. This patch changes the driver to fail gracefully when we observe a zero address instead of continuing to ioremap. Cc: Signed-off-by: Jarkko Sakkinen Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 44f9d20..1267322 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -233,6 +233,14 @@ static int crb_acpi_add(struct acpi_device *device) return -ENODEV; } + /* At least some versions of AMI BIOS have a bug that TPM2 table has + * zero address for the control area and therefore we must fail. + */ + if (!buf->control_area_pa) { + dev_err(dev, "TPM2 ACPI table has a zero address for the control area\n"); + return -EINVAL; + } + if (buf->hdr.length < sizeof(struct acpi_tpm2)) { dev_err(dev, "TPM2 ACPI table has wrong size"); return -EINVAL; -- cgit v0.10.2 From 4139032b4860c06ff3a7687041f06535fed901ed Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Mon, 29 Jun 2015 09:57:00 -0400 Subject: IB: Add rdma_cap_ib_switch helper and use where appropriate Persuant to Liran's comments on node_type on linux-rdma mailing list: In an effort to reform the RDMA core and ULPs to minimize use of node_type in struct ib_device, an additional bit is added to struct ib_device for is_switch (IB switch). This is needed to be initialized by any IB switch device driver. This is a NEW requirement on such device drivers which are all "out of tree". In addition, an ib_switch helper was added to ib_verbs.h based on the is_switch device bit rather than node_type (although those should be consistent). The RDMA core (MAD, SMI, agent, sa_query, multicast, sysfs) as well as (IPoIB and SRP) ULPs are updated where appropriate to use this new helper. In some cases, the helper is now used under the covers of using rdma_[start end]_port rather than the open coding previously used. Reviewed-by: Sean Hefty Reviewed-By: Jason Gunthorpe Reviewed-by: Ira Weiny Tested-by: Ira Weiny Signed-off-by: Hal Rosenstock Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index c7dcfe4..0429040 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -88,7 +88,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(device)) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); @@ -122,7 +122,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a4b1466..c82d751 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); - if (device->node_type == RDMA_NODE_IB_SWITCH && + if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->wr.ud.port_num; else @@ -787,7 +787,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && - opa_smi_handle_dr_smp_send(opa_smp, device->node_type, + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); @@ -810,7 +811,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } else { if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); @@ -2030,7 +2031,7 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv struct ib_smp *smp = (struct ib_smp *)recv->mad; if (smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2042,13 +2043,13 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2115,7 +2116,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, struct opa_smp *smp = (struct opa_smp *)recv->mad; if (opa_smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2127,7 +2128,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; @@ -2135,7 +2136,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2235,7 +2236,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, goto out; } - if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; @@ -3297,17 +3298,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int start, i; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + start = rdma_start_port(device); - for (i = start; i <= end; i++) { + for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; @@ -3342,17 +3337,9 @@ error: static void ib_mad_remove_device(struct ib_device *device) { - int start, end, i; - - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + int i; - for (i = start; i <= end; i++) { + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1244f02..2cb865c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -812,12 +812,8 @@ static void mcast_add_one(struct ib_device *device) if (!dev) return; - if (device->node_type == RDMA_NODE_IB_SWITCH) - dev->start_port = dev->end_port = 0; - else { - dev->start_port = 1; - dev->end_port = device->phys_port_cnt; - } + dev->start_port = rdma_start_port(device); + dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (!rdma_cap_ib_mcast(device, dev->start_port + i)) diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h index 62d91bf..3bfab35 100644 --- a/drivers/infiniband/core/opa_smi.h +++ b/drivers/infiniband/core/opa_smi.h @@ -39,12 +39,12 @@ #include "smi.h" -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int opa_smi_get_fwd_port(struct opa_smp *smp); extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 0fae850..ca919f4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1156,12 +1156,8 @@ static void ib_sa_add_one(struct ib_device *device) int s, e, i; int count = 0; - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 368a561..f19b238 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -41,7 +41,7 @@ #include "smi.h" #include "opa_smi.h" -static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, const u8 *return_path, @@ -64,7 +64,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-9:2 */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* return_path set when received */ @@ -77,7 +77,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == hop_cnt) { /* return_path set when received */ (*hop_ptr)++; - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -96,7 +96,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; (*hop_ptr)--; @@ -108,7 +108,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == 1) { (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -127,9 +127,9 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, * Return IB_SMI_DISCARD if the SMP should be discarded */ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -139,9 +139,9 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, @@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, OPA_LID_PERMISSIVE); } -static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num, int phys_port_cnt, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, @@ -173,7 +173,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-9:2 -- intermediate hop */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; return_path[*hop_ptr] = port_num; @@ -188,7 +188,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return_path[*hop_ptr] = port_num; /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -208,7 +208,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* hop_ptr updated when sending */ @@ -224,8 +224,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return IB_SMI_HANDLE; } /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE : IB_SMI_DISCARD); + return (is_switch ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ @@ -238,10 +237,10 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -254,10 +253,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index aff96ba..33c91c8 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -51,12 +51,12 @@ enum smi_forward_action { IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed6b6c8..0b84a9c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -870,7 +870,7 @@ int ib_device_register_sysfs(struct ib_device *device, goto err_put; } - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da149c2..0d8336e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1684,7 +1684,7 @@ static void ipoib_add_one(struct ib_device *device) struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int s, e, p; + int p; int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); @@ -1693,15 +1693,7 @@ static void ipoib_add_one(struct ib_device *device) INIT_LIST_HEAD(dev_list); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 267dc4f..d40e321 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3379,7 +3379,7 @@ static void srp_add_one(struct ib_device *device) struct srp_device *srp_dev; struct ib_device_attr *dev_attr; struct srp_host *host; - int mr_page_shift, s, e, p; + int mr_page_shift, p; u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); @@ -3443,15 +3443,7 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); if (host) list_add_tail(&host->list, &srp_dev->dev_list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 986fddb..b0f898e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1745,6 +1745,7 @@ struct ib_device { char node_desc[64]; __be64 node_guid; u32 local_dma_lkey; + u16 is_switch:1; u8 node_type; u8 phys_port_cnt; @@ -1824,6 +1825,20 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); /** + * rdma_cap_ib_switch - Check if the device is IB switch + * @device: Device to check + * + * Device driver is responsible for setting is_switch bit on + * in ib_device structure at init time. + * + * Return: true if the device is IB switch. + */ +static inline bool rdma_cap_ib_switch(const struct ib_device *device) +{ + return device->is_switch; +} + +/** * rdma_start_port - Return the first valid port number for the device * specified * @@ -1833,7 +1848,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, */ static inline u8 rdma_start_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; + return rdma_cap_ib_switch(device) ? 0 : 1; } /** @@ -1846,8 +1861,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) */ static inline u8 rdma_end_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; + return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) -- cgit v0.10.2 From cd4cd565e049c6e734a12754d91d1142c25f6a64 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 12:04:49 -0400 Subject: IB/mad: Fix compare between big endian and cpu endian The define OPA_LID_PERMISSIVE is big endian and was compared to the cpu endian variable opa_drslid. Problem caught by 0-day build infrastructure. Fixes: 8e4349d13f33 (IB/mad: Add final OPA MAD processing) Signed-off-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: John, Jubin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c82d751..786fc51 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -795,7 +795,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); - if (opa_drslid != OPA_LID_PERMISSIVE && + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", -- cgit v0.10.2 From 3b8ab700af0a5c5e59c833f8a88f94b97499f5e5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 09:52:50 -0400 Subject: IB/mad: Remove improper use of BUG_ON We recently added BUG_ON's which were inappropriate for a condition which should never happen. Change these to be WARN_ON_ONCE as a debugging aid. Fixes: 4cd7c9479aff ('IB/mad: Add support for additional MAD info to/from drivers') Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 12b5bc2..376b031 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -226,8 +226,9 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 948188e..ad3a926 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1499,8 +1499,9 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 85a50df..9a810e3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -861,8 +861,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 01fc97d..b84d13a 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -68,8 +68,9 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 6b2418b..7c3f2fb 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -209,8 +209,9 @@ int mthca_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4bafa15..29b2767 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,8 +215,9 @@ int ocrdma_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 05e3242..9625e7c 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2412,8 +2412,9 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: -- cgit v0.10.2 From b356c1c1f90a347b6f67d9272dda7ecf47e0b46c Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 24 Jun 2015 10:12:13 +0530 Subject: IB/srpt: Convert use of __constant_cpu_to_beXX to cpu_to_beXX In little endian cases, the macro cpu_to_be{16,32,64} unfolds to __swab{16,32,64} which provides special case for constants. In big endian cases, __constant_cpu_to_be{16,32,64} and cpu_to_be{16,32,64} expand directly to the same expression. So, replace __constant_cpu_to_be{16,32,64} with cpu_to_be{16,32,64} with the goal of getting rid of the definitions of __constant_cpu_to_be{16,32,64} completely. The Coccinelle semantic patch that performs this transformation is as follows: @@expression x;@@ ( - __constant_cpu_to_be16(x) + cpu_to_be16(x) | - __constant_cpu_to_be32(x) + cpu_to_be32(x) | - __constant_cpu_to_be64(x) + cpu_to_be64(x) ) Signed-off-by: Vaishali Thakkar Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 82897ca..60ff0a2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -302,7 +302,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad) int i; ioui = (struct ib_dm_iou_info *)mad->data; - ioui->change_id = __constant_cpu_to_be16(1); + ioui->change_id = cpu_to_be16(1); ioui->max_controllers = 16; /* set present for slot 1 and empty for the rest */ @@ -330,13 +330,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -348,10 +348,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); iocp->subsys_device_id = 0x0; - iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); - iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); - iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); - iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); @@ -379,13 +379,13 @@ static void srpt_get_svc_entries(u64 ioc_guid, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2 || lo > hi || hi > 1) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -436,7 +436,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, break; default: rsp_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; } } @@ -493,11 +493,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, break; case IB_MGMT_METHOD_SET: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; default: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); break; } @@ -1535,7 +1535,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = - __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->status = status; @@ -1585,8 +1585,8 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 - + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; @@ -1630,7 +1630,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) switch (len) { case 8: if ((*((__be64 *)lun) & - __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) goto out_err; break; case 4: @@ -2449,8 +2449,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (it_iu_len > srp_max_req_size || it_iu_len < 64) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because its" " length (%d bytes) is out of range (%d .. %d)\n", @@ -2459,8 +2459,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (!sport->enabled) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because the target port" " has not yet been enabled\n"); @@ -2505,8 +2505,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) || *(__be64 *)(req->target_port_id + 8) != cpu_to_be64(srpt_service_guid)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); ret = -ENOMEM; pr_err("rejected SRP_LOGIN_REQ because it" " has an invalid target port identifier.\n"); @@ -2515,8 +2515,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch = kzalloc(sizeof *ch, GFP_KERNEL); if (!ch) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because no memory.\n"); ret = -ENOMEM; goto reject; @@ -2552,8 +2552,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_create_ch_ib(ch); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); goto free_ring; @@ -2561,8 +2561,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_ch_qp_rtr(ch, ch->qp); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling" " RTR failed (error code = %d)\n", ret); goto destroy_ib; @@ -2580,15 +2579,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (!nacl) { pr_info("Rejected login because no ACL has been" " configured yet for initiator %s.\n", ch->sess_name); - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); goto destroy_ib; } ch->sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(ch->sess)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_debug("Failed to create session\n"); goto deregister_session; } @@ -2604,8 +2603,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, rsp->max_it_iu_len = req->req_it_iu_len; rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; - rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); rsp->req_lim_delta = cpu_to_be32(ch->rq_size); atomic_set(&ch->req_lim, ch->rq_size); atomic_set(&ch->req_lim_delta, 0); @@ -2655,8 +2654,8 @@ free_ch: reject: rej->opcode = SRP_LOGIN_REJ; rej->tag = req->tag; - rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, (void *)rej, sizeof *rej); -- cgit v0.10.2 From 3fdf70acec13efc7ecf254f5a364df06348bfd2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 25 Jun 2015 13:34:15 +0300 Subject: IB/srp: Avoid using uninitialized variable We might return res which is not initialized. Also reduce code duplication by exporting srp_parse_tmo so srp_tmo_set can reuse it. Detected by Coverity. Signed-off-by: Sagi Grimberg Signed-off-by: Jenny Falkovich Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d40e321..31a20b4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -161,13 +161,10 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) { int tmo, res; - if (strncmp(val, "off", 3) != 0) { - res = kstrtoint(val, 0, &tmo); - if (res) - goto out; - } else { - tmo = -1; - } + res = srp_parse_tmo(&tmo, val); + if (res) + goto out; + if (kp->arg == &srp_reconnect_delay) res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, srp_dev_loss_tmo); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index a85292b..e3cd3ec 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -203,7 +203,7 @@ static ssize_t srp_show_tmo(char *buf, int tmo) return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n"); } -static int srp_parse_tmo(int *tmo, const char *buf) +int srp_parse_tmo(int *tmo, const char *buf) { int res = 0; @@ -214,6 +214,7 @@ static int srp_parse_tmo(int *tmo, const char *buf) return res; } +EXPORT_SYMBOL(srp_parse_tmo); static ssize_t show_reconnect_delay(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index cdb05dd1..d40d3ef 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -119,6 +119,7 @@ extern struct srp_rport *srp_rport_add(struct Scsi_Host *, extern void srp_rport_del(struct srp_rport *); extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo); +int srp_parse_tmo(int *tmo, const char *buf); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); -- cgit v0.10.2 From be4b499323bf7291b491c6df51baae62f45b8404 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 25 Jun 2015 17:13:22 +0300 Subject: IB/cm: Do not queue work to a device that's going away Whenever ib_cm gets remove_one call, like when there is a hot-unplug event, the driver should mark itself as going_down and confirm that no new works are going to be queued for that device. so, the order of the actions are: 1. mark the going_down bit. 2. flush the wq. 3. [make sure no new works for that device.] 4. unregister mad agent. otherwise, works that are already queued can be scheduled after the mad agent was freed. Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index dbddddd..3a972eb 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -169,6 +169,7 @@ struct cm_device { struct ib_device *ib_device; struct device *device; u8 ack_delay; + int going_down; struct cm_port *port[0]; }; @@ -805,6 +806,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); + if (!cm_dev) + return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); @@ -818,8 +824,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); - queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, - msecs_to_jiffies(wait_time)); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + spin_unlock_irq(&cm.lock); + cm_id_priv->timewait_info = NULL; } @@ -3305,6 +3317,11 @@ static int cm_establish(struct ib_cm_id *cm_id) struct cm_work *work; unsigned long flags; int ret = 0; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id->device, &cm_client); + if (!cm_dev) + return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) @@ -3343,7 +3360,17 @@ static int cm_establish(struct ib_cm_id *cm_id) work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) { + queue_delayed_work(cm.wq, &work->work, 0); + } else { + kfree(work); + ret = -ENODEV; + } + spin_unlock_irq(&cm.lock); + out: return ret; } @@ -3394,6 +3421,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, enum ib_cm_event_type event; u16 attr_id; int paths = 0; + int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: @@ -3452,7 +3480,19 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!port->cm_dev->going_down) + queue_delayed_work(cm.wq, &work->work, 0); + else + going_down = 1; + spin_unlock_irq(&cm.lock); + + if (going_down) { + kfree(work); + ib_free_recv_mad(mad_recv_wc); + } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, @@ -3771,7 +3811,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); - + cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); @@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device *ib_device) list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); + spin_lock_irq(&cm.lock); + cm_dev->going_down = 1; + spin_unlock_irq(&cm.lock); + for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - ib_unregister_mad_agent(port->mad_agent); + /* + * We flush the queue here after the going_down set, this + * verify that no new works will be queued in the recv handler, + * after that we can call the unregister_mad_agent + */ flush_workqueue(cm.wq); + ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); -- cgit v0.10.2 From 8a7ff14dcb40bade309cd2ad04c746bb1d169c4e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 1 Jul 2015 14:31:02 +0300 Subject: IB/mlx4: Do not attemp to report HCA clock offset on VFs mlx4 VFs can provide CQE raw time-stamping services, but they don't have the hca core clock mapped to their PCI bars. As such, we should not attempt to query and report the clock offset to user space for VFs. Doing so causes query_device over VFs to fail with -ENOSUPP. Fixes: 4b664c4355b2 ('IB/mlx4: Add support for CQ time-stamping') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 067a691..f419a72 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -253,14 +253,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; - err = mlx4_get_internal_clock_params(dev->dev, &clock_params); - if (err) - goto out; + if (!mlx4_is_slave(dev->dev)) + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { - resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset); - resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + if (!err && !mlx4_is_slave(dev->dev)) { + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + } } if (uhw->outlen) { -- cgit v0.10.2 From 58e9cc90cda7bc732856a2ad3210328fbc4f6ca2 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 1 Jul 2015 14:31:01 +0300 Subject: IB/IPoIB: Fix bad error flow in ipoib_add_port() Error values of ib_query_port() and ib_query_device() weren't propagated correctly. Because of that, ipoib_add_port() could return NULL value, which escaped the IS_ERR() check in ipoib_add_one() and we crashed. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0d8336e..da3e7e7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1577,7 +1577,8 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); priv->dev->dev_id = port - 1; - if (!ib_query_port(hca, port, &attr)) + result = ib_query_port(hca, port, &attr); + if (!result) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", @@ -1598,7 +1599,8 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } - if (ipoib_set_dev_features(priv, hca)) + result = ipoib_set_dev_features(priv, hca); + if (result) goto device_init_failed; /* -- cgit v0.10.2 From a7f2f24cd716d7f1119d46929f41a8436f9c252f Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:47:44 -0500 Subject: RDMA/core: Fixes for port mapper client registration Fixes to allow clients to make remove mapping requests, after they have provided the user space service with the mapping information, they are using when the service is restarted. 1) Adding IWPM_REG_VALID, IWPM_REG_INCOMPL and IWPM_REG_UNDEF registration types for the port mapper clients and functions to set/check the registration type. 2) If the port mapper user space service is not available to register the client, then its registration stays IWPM_REG_UNDEF and the registration isn't checked until the service becomes available (no mappings are possible, if the user space service isn't running). 3) After the service is restarted, the user space port mapper pid is set to valid and the client registration is set to IWPM_REG_INCOMPL to allow the client to make remove mapping requests. Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index e6ffa2e..22a3abe 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -67,7 +67,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto pid_query_error; } - if (iwpm_registered_client(nl_client)) + if (iwpm_check_registration(nl_client, IWPM_REG_VALID) || + iwpm_user_pid == IWPM_PID_UNAVAILABLE) return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); if (!skb) { @@ -106,7 +107,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ - iwpm_set_registered(nl_client, 1); iwpm_user_pid = IWPM_PID_UNAVAILABLE; err_str = "Unable to send a nlmsg"; goto pid_query_error; @@ -144,12 +144,12 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto add_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto add_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; @@ -214,12 +214,12 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto query_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto query_mapping_error; } - if (!iwpm_valid_pid()) - return 0; ret = -ENOMEM; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); if (!skb) { @@ -288,12 +288,12 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) err_str = "Invalid port mapper client"; goto remove_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) { err_str = "Unregistered port mapper client"; goto remove_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); if (!skb) { ret = -ENOMEM; @@ -388,7 +388,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", __func__, iwpm_user_pid); if (iwpm_valid_client(nl_client)) - iwpm_set_registered(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_VALID); register_pid_response_exit: nlmsg_request->request_done = 1; /* always for found nlmsg_request */ @@ -644,7 +644,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; const char *msg_type = "Mapping Info response"; - int iwpm_pid; u8 nl_client; char *iwpm_name; u16 iwpm_version; @@ -669,14 +668,14 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) __func__, nl_client); return ret; } - iwpm_set_registered(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + iwpm_user_pid = cb->nlh->nlmsg_pid; if (!iwpm_mapinfo_available()) return 0; - iwpm_pid = cb->nlh->nlmsg_pid; pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", - __func__, iwpm_pid); - ret = iwpm_send_mapinfo(nl_client, iwpm_pid); + __func__, iwpm_user_pid); + ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid); return ret; } EXPORT_SYMBOL(iwpm_mapping_info_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index a626795..5fb089e 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -78,6 +78,7 @@ init_exit: mutex_unlock(&iwpm_admin_lock); if (!ret) { iwpm_set_valid(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__); } @@ -106,6 +107,7 @@ int iwpm_exit(u8 nl_client) } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } EXPORT_SYMBOL(iwpm_exit); @@ -397,17 +399,23 @@ void iwpm_set_valid(u8 nl_client, int valid) } /* valid client */ -int iwpm_registered_client(u8 nl_client) +u32 iwpm_get_registration(u8 nl_client) { return iwpm_admin.reg_list[nl_client]; } /* valid client */ -void iwpm_set_registered(u8 nl_client, int reg) +void iwpm_set_registration(u8 nl_client, u32 reg) { iwpm_admin.reg_list[nl_client] = reg; } +/* valid client */ +u32 iwpm_check_registration(u8 nl_client, u32 reg) +{ + return (iwpm_get_registration(nl_client) & reg); +} + int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index ee2d9ff..b7b9e19 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -58,6 +58,10 @@ #define IWPM_PID_UNDEFINED -1 #define IWPM_PID_UNAVAILABLE -2 +#define IWPM_REG_UNDEF 0x01 +#define IWPM_REG_VALID 0x02 +#define IWPM_REG_INCOMPL 0x04 + struct iwpm_nlmsg_request { struct list_head inprocess_list; __u32 nlmsg_seq; @@ -88,7 +92,7 @@ struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; int client_list[RDMA_NL_NUM_CLIENTS]; - int reg_list[RDMA_NL_NUM_CLIENTS]; + u32 reg_list[RDMA_NL_NUM_CLIENTS]; }; /** @@ -159,19 +163,31 @@ int iwpm_valid_client(u8 nl_client); void iwpm_set_valid(u8 nl_client, int valid); /** - * iwpm_registered_client - Check if the port mapper client is registered + * iwpm_check_registration - Check if the client registration + * matches the given one * @nl_client: The index of the netlink client + * @reg: The given registration type to compare with * * Call iwpm_register_pid() to register a client + * Returns true if the client registration matches reg, + * otherwise returns false + */ +u32 iwpm_check_registration(u8 nl_client, u32 reg); + +/** + * iwpm_set_registration - Set the client registration + * @nl_client: The index of the netlink client + * @reg: Registration type to set */ -int iwpm_registered_client(u8 nl_client); +void iwpm_set_registration(u8 nl_client, u32 reg); /** - * iwpm_set_registered - Set the port mapper client to registered or not + * iwpm_get_registration * @nl_client: The index of the netlink client - * @reg: 1 if registered or 0 if not + * + * Returns the client registration type */ -void iwpm_set_registered(u8 nl_client, int reg); +u32 iwpm_get_registration(u8 nl_client); /** * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of -- cgit v0.10.2 From 165d68228906f2866a52069ef1176ab70fa9e216 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:49:40 -0500 Subject: RDMA/nes: Fix for resolving the neigh Neighbor resolution doesn't work without this fix Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9047af4..8a3ad17 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1520,8 +1520,9 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi int rc = arpindex; struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + __be32 dst_ipaddr = htonl(dst_ip); - rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0); if (IS_ERR(rt)) { printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); @@ -1533,7 +1534,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); if (neigh) { -- cgit v0.10.2 From 0a691272509d59ea0adbaa0c420ca0a71c9d0419 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:52:29 -0500 Subject: RDMA/nes: Fix for incorrect recording of the MAC address Fix for incorrect recording of the MAC address Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 02120d3..4713dd7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3861,7 +3861,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( - (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]); } else { cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; -- cgit v0.10.2 From 4fabb59449aa44a585b3603ffdadd4c5f4d0c033 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Mon, 6 Jul 2015 14:35:11 +0800 Subject: rds: rds_ib_device.refcount overflow Fixes: 3e0249f9c05c ("RDS/IB: add refcount tracking to struct rds_ib_device") There lacks a dropping on rds_ib_device.refcount in case rds_ib_alloc_fmr failed(mr pool running out). this lead to the refcount overflow. A complain in line 117(see following) is seen. From vmcore: s_ib_rdma_mr_pool_depleted is 2147485544 and rds_ibdev->refcount is -2147475448. That is the evidence the mr pool is used up. so rds_ib_alloc_fmr is very likely to return ERR_PTR(-EAGAIN). 115 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 116 { 117 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); 118 if (atomic_dec_and_test(&rds_ibdev->refcount)) 119 queue_work(rds_wq, &rds_ibdev->free_work); 120 } fix is to drop refcount when rds_ib_alloc_fmr failed. Signed-off-by: Wengang Wang Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bf..657ba9f 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) -- cgit v0.10.2 From 31b57b87fddf547bf3e98306b41bc82e111396fa Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:12 +0300 Subject: IB/ucma: Fix lockdep warning in ucma_lock_files The ucma_lock_files() locks the mut mutex on two files, e.g. for migrating an ID. Use mutex_lock_nested() to prevent the warning below. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc6-hmm+ #40 Tainted: G O --------------------------------------------- pingpong_rpc_se/10260 is trying to acquire lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] but task is already holding lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&file->mut); lock(&file->mut); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by pingpong_rpc_se/10260: #0: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] stack backtrace: CPU: 0 PID: 10260 Comm: pingpong_rpc_se Tainted: G O 4.1.0-rc6-hmm+ #40 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 ffff8801f85b63d0 ffff880195677b58 ffffffff81668f49 0000000000000001 ffffffff825cbbe0 ffff880195677c38 ffffffff810bb991 ffff880100000000 ffff880100000000 ffff880100000001 ffff8801f85b7010 ffffffff8121bee9 Call Trace: [] dump_stack+0x4f/0x6e [] __lock_acquire+0x741/0x1820 [] ? dput+0x29/0x320 [] lock_acquire+0xc8/0x240 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? mutex_lock_nested+0x291/0x3e0 [] mutex_lock_nested+0x65/0x3e0 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? trace_hardirqs_on+0xd/0x10 [] ? mutex_unlock+0xe/0x10 [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ucma_write+0xa4/0xb0 [rdma_ucm] [] __vfs_write+0x34/0x100 [] ? __audit_syscall_entry+0xac/0x110 [] ? current_kernel_time+0xc5/0xe0 [] ? security_file_permission+0x23/0x90 [] ? rw_verify_area+0x5d/0xe0 [] vfs_write+0xab/0x120 [] SyS_write+0x59/0xd0 [] ? __audit_syscall_entry+0xac/0x110 [] system_call_fastpath+0x12/0x76 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ad45469..a700cdb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1354,10 +1354,10 @@ static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); - mutex_lock(&file2->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); - mutex_lock(&file1->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } -- cgit v0.10.2 From 8b7cce0dae956b329d09099f8739821936cf7c0f Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:13 +0300 Subject: IB/ipoib: Prevent lockdep warning in __ipoib_ib_dev_flush __ipoib_ib_dev_flush calls itself recursively on child devices, and lockdep complains about locking vlan_rwsem twice (see below). Use down_read_nested instead of down_read to prevent the warning. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc4+ #36 Tainted: G O --------------------------------------------- kworker/u20:2/261 is trying to acquire lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] but task is already holding lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&priv->vlan_rwsem); lock(&priv->vlan_rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/u20:2/261: #0: ("%s""ipoib_flush"){.+.+..}, at: [] process_one_work+0x15c/0x760 #1: ((&priv->flush_heavy)){+.+...}, at: [] process_one_work+0x15c/0x760 #2: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] stack backtrace: CPU: 3 PID: 261 Comm: kworker/u20:2 Tainted: G O 4.1.0-rc4+ #36 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 Workqueue: ipoib_flush ipoib_ib_dev_flush_heavy [ib_ipoib] ffff8801c6c54790 ffff8801c9927af8 ffffffff81665238 0000000000000001 ffffffff825b5b30 ffff8801c9927bd8 ffffffff810bba51 ffff880100000000 ffffffff00000001 ffff880100000001 ffff8801c6c55428 ffff8801c6c54790 Call Trace: [] dump_stack+0x4f/0x6f [] __lock_acquire+0x741/0x1820 [] lock_acquire+0xc8/0x240 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] down_read+0x4c/0x70 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x5a/0x2b0 [ib_ipoib] [] ipoib_ib_dev_flush_heavy+0x1a/0x20 [ib_ipoib] [] process_one_work+0x201/0x760 [] ? process_one_work+0x15c/0x760 [] worker_thread+0x120/0x4d0 [] ? process_one_work+0x760/0x760 [] ? process_one_work+0x760/0x760 [] kthread+0xfe/0x120 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x42/0x70 [] ? __init_kthread_worker+0x70/0x70 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 63b92cb..058150f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -985,20 +985,21 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level) + enum ipoib_flush_level level, + int nesting) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read(&priv->vlan_rwsem); + down_read_nested(&priv->vlan_rwsem, nesting); /* * Flush any child interfaces too -- they might be up even if * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level); + __ipoib_ib_dev_flush(cpriv, level, nesting + 1); up_read(&priv->vlan_rwsem); @@ -1076,7 +1077,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1084,7 +1085,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1092,7 +1093,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); } void ipoib_ib_dev_cleanup(struct net_device *dev) -- cgit v0.10.2 From 59d40dd92ca00df317ad44fa44c27fe38c58e477 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Thu, 11 Jun 2015 11:06:41 -0500 Subject: IB/ucm: Fix bitmap wrap when devnum > IB_UCM_MAX_DEVICES ib_ucm_release_dev clears the wrong bit if devnum is greater than IB_UCM_MAX_DEVICES. Signed-off-by: Carol L Soto Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 62c24b1..0094810 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1193,6 +1193,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; @@ -1202,7 +1203,7 @@ static void ib_ucm_release_dev(struct device *dev) if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } @@ -1226,7 +1227,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static int find_overflow_devnum(void) { int ret; -- cgit v0.10.2 From c42687784b9a6b9733fee701ed236a5fe088fac4 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 12 Jul 2015 01:24:09 -0700 Subject: IB/ipoib: Scatter-Gather support in connected mode By default, IPoIB-CM driver uses 64k MTU. Larger MTU gives better performance. This MTU plus overhead puts the memory allocation for IP based packets at 32 4k pages (order 5), which have to be contiguous. When the system memory under pressure, it was observed that allocating 128k contiguous physical memory is difficult and causes serious errors (such as system becomes unusable). This enhancement resolve the issue by removing the physically contiguous memory requirement using Scatter/Gather feature that exists in Linux stack. With this fix Scatter-Gather will be supported also in connected mode. This change reverts some of the change made in commit e112373fd6aa ("IPoIB/cm: Reduce connected mode TX object size"). The ability to use SG in IPoIB CM is possible because the coupling between NETIF_F_SG and NETIF_F_CSUM was removed in commit ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Yuval Shaia Acked-by: Christian Marie Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bd94b0a..79859c4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -239,7 +239,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_cm_tx_buf *tx_ring; + struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -504,6 +504,33 @@ int ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req); + +static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) +{ + int i, off; + struct sk_buff *skb = tx_req->skb; + skb_frag_t *frags = skb_shinfo(skb)->frags; + int nr_frags = skb_shinfo(skb)->nr_frags; + u64 *mapping = tx_req->mapping; + + if (skb_headlen(skb)) { + priv->tx_sge[0].addr = mapping[0]; + priv->tx_sge[0].length = skb_headlen(skb); + off = 1; + } else + off = 0; + + for (i = 0; i < nr_frags; ++i) { + priv->tx_sge[i + off].addr = mapping[i + off]; + priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); + } + priv->tx_wr.num_sge = nr_frags + off; +} + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index cf32a778..ee39be6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -694,14 +694,12 @@ repost: static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_cm_tx *tx, unsigned int wr_id, - u64 addr, int len) + struct ipoib_tx_buf *tx_req) { struct ib_send_wr *bad_wr; - priv->tx_sge[0].addr = addr; - priv->tx_sge[0].length = len; + ipoib_build_sge(priv, tx_req); - priv->tx_wr.num_sge = 1; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); @@ -710,8 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_tx_buf *tx_req; - u64 addr; + struct ipoib_tx_buf *tx_req; int rc; if (unlikely(skb->len > tx->mtu)) { @@ -735,24 +732,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ */ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; tx_req->skb = skb; - addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { ++dev->stats.tx_errors; dev_kfree_skb_any(skb); return; } - tx_req->mapping = addr; - skb_orphan(skb); skb_dst_drop(skb); - rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); } else { dev->trans_start = jiffies; @@ -777,7 +771,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -791,7 +785,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1036,6 +1030,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; + if (dev->features & NETIF_F_SG) + attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", @@ -1170,7 +1167,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1197,8 +1194,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, - DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; netif_tx_lock_bh(p->dev); @@ -1455,7 +1451,6 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } - static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 058150f..d266667 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -263,8 +263,7 @@ repost: "for buf %d\n", wr_id); } -static int ipoib_dma_map_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -305,8 +304,8 @@ partial_error: return -EIO; } -static void ipoib_dma_unmap_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -314,7 +313,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, int off; if (skb_headlen(skb)) { - ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb), + DMA_TO_DEVICE); off = 1; } else off = 0; @@ -322,8 +322,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag), - DMA_TO_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + off], + skb_frag_size(frag), DMA_TO_DEVICE); } } @@ -389,7 +389,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); ++dev->stats.tx_packets; dev->stats.tx_bytes += tx_req->skb->len; @@ -514,24 +514,10 @@ static inline int post_send(struct ipoib_dev_priv *priv, void *head, int hlen) { struct ib_send_wr *bad_wr; - int i, off; struct sk_buff *skb = tx_req->skb; - skb_frag_t *frags = skb_shinfo(skb)->frags; - int nr_frags = skb_shinfo(skb)->nr_frags; - u64 *mapping = tx_req->mapping; - if (skb_headlen(skb)) { - priv->tx_sge[0].addr = mapping[0]; - priv->tx_sge[0].length = skb_headlen(skb); - off = 1; - } else - off = 0; + ipoib_build_sge(priv, tx_req); - for (i = 0; i < nr_frags; ++i) { - priv->tx_sge[i + off].addr = mapping[i + off]; - priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); - } - priv->tx_wr.num_sge = nr_frags + off; priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.ah = address; @@ -617,7 +603,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; --priv->tx_outstanding; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) netif_wake_queue(dev); @@ -868,7 +854,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; --priv->tx_outstanding; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da3e7e7..59604a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -190,7 +190,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu struct ipoib_dev_priv *priv = netdev_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO); + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); return features; } -- cgit v0.10.2 From edcd2a7474ba3b47e54c3c9a300287342de74766 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 7 Jun 2015 13:36:11 +0300 Subject: IB/ipoib: Set MTU to max allowed by mode when mode changes When switching between modes (datagram / connected) change the MTU accordingly. datagram mode up to 4K, connected mode up to (64K - 0x10). Signed-off-by: ELi Cohen Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 59604a7..b2943c8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -232,6 +232,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); netdev_update_features(dev); + dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; -- cgit v0.10.2 From cb1ff431c3dd904cda37d6d07d4a3ea29840d621 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Tue, 16 Jun 2015 17:13:05 +0530 Subject: IB/ipath: Convert use of __constant_ to In little endian cases, the macros be16_to_cpu and cpu_to_be64 unfolds to __swab{16,64} which provides special case for constants. In big endian cases, __constant_be16_to_cpu and be16_to_cpu expand directly to the same expression. The same applies for __constant_cpu_to_be64 and cpu_to_be64. So, replace __constant_be16_to_cpu with be16_to_cpu and __constant_cpu_to_be64 with cpu_to_be64, with the goal of getting rid of the definition of __constant_be16_to_cpu and __constant_cpu_to_be64 completely. Signed-off-by: Vaishali Thakkar Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 48253b8..30ba49c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -2044,9 +2044,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd) spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); - idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); + idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); /* Set the prefix to the default value (see ch. 4.1.1) */ - idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); + idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL); ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); if (ret) -- cgit v0.10.2 From 43bfb9729ea88d46e3f4d3ad7b17106c7b071fcb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 25 Jun 2015 17:45:38 +0300 Subject: IB/mlx4: Fix use of flow-counters for process_mad For IB links, reading HCA flow counters through iboe_process_mad() should be used when mlx4_ib_process_mad() is invoked only for VFs PMA queries and exactly nothing else. Fixes: 7193a141eb74 ('IB/mlx4: Set VF to read from QP counters') Reported-by: Linus Torvalds Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9a810e3..68b3dfa 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -860,22 +860,31 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx4_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || *out_mad_size != sizeof(*out_mad))) return IB_MAD_RESULT_FAILURE; - switch (rdma_port_get_link_layer(ibdev, port_num)) { - case IB_LINK_LAYER_INFINIBAND: - if (!mlx4_is_slave(dev->dev)) - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - case IB_LINK_LAYER_ETHERNET: - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - default: - return -EINVAL; + /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA + * queries, should be called only by VFs and for that specific purpose + */ + if (link == IB_LINK_LAYER_INFINIBAND) { + if (mlx4_is_slave(dev->dev) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); } + + if (link == IB_LINK_LAYER_ETHERNET) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return -EINVAL; } static void send_handler(struct ib_mad_agent *agent, -- cgit v0.10.2 From a39a98ff4cc8b514fe6fa551f6ed59cd60e07da2 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Wed, 8 Jul 2015 09:43:35 +0530 Subject: IB/mlx4: Optimize freeing of items on error unwind On failure, we loop through all possible pointers and test them before calling kfree. But really, why even attempt to free items we didn't allocate when we can easily loop through exactly and only the devices for which the original memory allocation succeeded and free just those. Signed-off-by: Maninder Singh Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f419a72..064454a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2670,17 +2670,15 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); - goto out; + return; } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); - for (i = 0; i < dev->caps.num_ports; i++) { - if (dm[i]) - kfree(dm[i]); - } + while (--i >= 0) + kfree(dm[i]); goto out; } } -- cgit v0.10.2 From 9bbf282da87294e1bda0ccb4e351bfdf5fc076cd Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:16:12 -0400 Subject: IB/mlx4: Fix memory leak in do_slave_init We create a number of work structs to be queued up to a workqueue, and on completion of the workqueue handler, the workqueue handler frees the allocated memory. If, however, we don't queue the work struct because the device is going down, then we need to free the memory ourselves. Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 064454a..1c59e47 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2692,6 +2692,8 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + else + kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: -- cgit v0.10.2 From d9a047aeffcef5755952d18f2901d8777d84019d Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:21:08 -0400 Subject: IB/mlx4: Optimize do_slave_init There is little chance our memory allocation will fail, so we can combine initializing the work structs with allocating them instead of looping through all of them once to allocate and again to initialize. Then when we need to actually find out if our device is up or in the process of going down, have all of our work structs batched up, take the spin_lock once and only once, and do all of the batch under the one spin_lock invocation instead of incurring all of the locked memory cycles we would otherwise incur to take/release the spin_lock over and over again. Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c59e47..8be6db8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2681,20 +2681,22 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) kfree(dm[i]); goto out; } - } - /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; - spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); - if (!ibdev->sriov.is_going_down) + } + /* initialize or tear down tunnel QPs for the slave */ + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) { + for (i = 0; i < ports; i++) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); - else - kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } else { + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + for (i = 0; i < ports; i++) + kfree(dm[i]); } out: kfree(dm); -- cgit v0.10.2 From 45d254206f141f560e76319191f912d0a3d27bd3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:21:15 +0200 Subject: IB/core: Destroy multcast_idr on module exit Destroy multcast_idr on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index a700cdb..29b2121 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1616,6 +1616,7 @@ static void __exit ucma_cleanup(void) device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); } module_init(ucma_init); -- cgit v0.10.2 From d8b2ba7c5928173fe1c12bd2545f5ed85d1c3c7a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:23:00 +0200 Subject: IB/core: Destroy ocrdma_dev_id IDR on module exit Destroy ocrdma_dev_id IDR on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8a1398b..d98a707 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -696,6 +696,7 @@ static void __exit ocrdma_exit_module(void) ocrdma_unregister_inet6addr_notifier(); ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); + idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); -- cgit v0.10.2 From 8642d7f8364d9fa84a83629129e78402c169c54a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 Jul 2015 16:44:54 +0200 Subject: intel_scu_ipc: move local memory initialization out of a mutex '{ }' and memset will both reset the cbuf buffer. Only once is enough and this can be done outside fo the mutex. Signed-off-by: Christophe JAILLET Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 001b199..187d108 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -216,13 +216,13 @@ static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id) int nc; u32 offset = 0; int err; - u8 cbuf[IPC_WWBUF_SIZE] = { }; + u8 cbuf[IPC_WWBUF_SIZE]; u32 *wbuf = (u32 *)&cbuf; - mutex_lock(&ipclock); - memset(cbuf, 0, sizeof(cbuf)); + mutex_lock(&ipclock); + if (ipcdev.pdev == NULL) { mutex_unlock(&ipclock); return -ENODEV; -- cgit v0.10.2 From 26456955719b79cc4a011b18221aa68f599f6b6c Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 15 Jul 2015 12:52:47 -0500 Subject: jfs: clean up jfs_rename and fix out of order unlock The end of jfs_rename(), which is also used by the error paths, included a call to IWRITE_UNLOCK(new_ip) after labels out1, out2 and out3. If we come in through these labels, IWRITE_LOCK() has not been called yet. In moving that call to the correct spot, I also moved some exceptional truncate code earlier as well, since the early error paths don't need to deal with it, and I renamed out4: to out_tx: so a future patch by Jan Kara doesn't need to deal with renumbering or confusing out-of-order labels. Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 66db7bc..f7b4037 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1161,7 +1161,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = dtModify(tid, new_dir, &new_dname, &ino, old_ip->i_ino, JFS_RENAME); if (rc) - goto out4; + goto out_tx; drop_nlink(new_ip); if (S_ISDIR(new_ip->i_mode)) { drop_nlink(new_ip); @@ -1186,7 +1186,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if ((new_size = commitZeroLink(tid, new_ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ rc = new_size; - goto out4; + goto out_tx; } tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_DELETE; @@ -1204,7 +1204,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { jfs_err("jfs_rename didn't expect dtSearch to fail " "w/rc = %d", rc); - goto out4; + goto out_tx; } ino = old_ip->i_ino; @@ -1212,7 +1212,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { if (rc == -EIO) jfs_err("jfs_rename: dtInsert returned -EIO"); - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) inc_nlink(new_dir); @@ -1227,7 +1227,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_err("jfs_rename did not expect dtDelete to return rc = %d", rc); txAbort(tid, 1); /* Marks Filesystem dirty */ - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) { drop_nlink(old_dir); @@ -1286,7 +1286,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = txCommit(tid, ipcount, iplist, commit_flag); - out4: + out_tx: txEnd(tid); if (new_ip) mutex_unlock(&JFS_IP(new_ip)->commit_mutex); @@ -1309,13 +1309,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new_ip && (new_ip->i_nlink == 0)) set_cflag(COMMIT_Nolink, new_ip); - out3: - free_UCSname(&new_dname); - out2: - free_UCSname(&old_dname); - out1: - if (new_ip && !S_ISDIR(new_ip->i_mode)) - IWRITE_UNLOCK(new_ip); /* * Truncating the directory index table is not guaranteed. It * may need to be done iteratively @@ -1326,7 +1319,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, clear_cflag(COMMIT_Stale, old_dir); } - + if (new_ip && !S_ISDIR(new_ip->i_mode)) + IWRITE_UNLOCK(new_ip); + out3: + free_UCSname(&new_dname); + out2: + free_UCSname(&old_dname); + out1: jfs_info("jfs_rename: returning %d", rc); return rc; } -- cgit v0.10.2 From 7bee607472aa2e5a36dfe143e5a625be06125f53 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 14 Jul 2015 11:57:48 -0600 Subject: NVMe: Reread partitions on metadata formats This patch has the driver automatically reread partitions if a namespace has a separate metadata format. Previously revalidating a disk was sufficient to get the correct capacity set on such formatted drives, but partitions that may exist would not have been surfaced. Reported-by: Paul Grabinar Signed-off-by: Keith Busch Cc: Matthew Wilcox Tested-by: Paul Grabinar Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index d1d6141..7920c27 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2108,8 +2108,17 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) goto out_free_disk; add_disk(ns->disk); - if (ns->ms) - revalidate_disk(ns->disk); + if (ns->ms) { + struct block_device *bd = bdget_disk(ns->disk, 0); + if (!bd) + return; + if (blkdev_get(bd, FMODE_READ, NULL)) { + bdput(bd); + return; + } + blkdev_reread_part(bd); + blkdev_put(bd, FMODE_READ); + } return; out_free_disk: kfree(disk); -- cgit v0.10.2 From e56f698bd0720e17f10f39e8b0b5b446ad0ab22c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2015 19:53:22 +0800 Subject: blk-mq: set default timeout as 30 seconds It is reasonable to set default timeout of request as 30 seconds instead of 30000 ticks, which may be 300 seconds if HZ is 100, for example, some arm64 based systems may choose 100 HZ. Signed-off-by: Ming Lei Fixes: c76cbbcf4044 ("blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()" Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index f537796..7d842db 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1998,7 +1998,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_hctxs; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); - blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); + blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; -- cgit v0.10.2 From c2efefb33abfb245395199137ece3c1e3df47f51 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:53:43 +0200 Subject: ACPI / scan: Move sysfs-related device code to a separate file To reduce the size of scan.c and improve the readability of it, move all code related to device sysfs, modalias creation etc. to a new file called device_sysfs.c. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 8321430..08ac110 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -24,7 +24,7 @@ acpi-y += nvs.o # Power management related files acpi-y += wakeup.o acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o -acpi-y += device_pm.o +acpi-y += device_sysfs.o device_pm.o acpi-$(CONFIG_ACPI_SLEEP) += proc.o diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c new file mode 100644 index 0000000..4ab4582 --- /dev/null +++ b/drivers/acpi/device_sysfs.c @@ -0,0 +1,521 @@ +/* + * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias. + * + * Copyright (C) 2015, Intel Corp. + * Author: Mika Westerberg + * Author: Rafael J. Wysocki + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include + +#include "internal.h" + +/** + * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent + * @acpi_dev: ACPI device object. + * @modalias: Buffer to print into. + * @size: Size of the buffer. + * + * Creates hid/cid(s) string needed for modalias and uevent + * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: + * char *modalias: "acpi:IBM0001:ACPI0001" + * Return: 0: no _HID and no _CID + * -EINVAL: output error + * -ENOMEM: output is truncated +*/ +static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, + int size) +{ + int len; + int count; + struct acpi_hardware_id *id; + + /* + * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should + * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the + * device's list. + */ + count = 0; + list_for_each_entry(id, &acpi_dev->pnp.ids, list) + if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) + count++; + + if (!count) + return 0; + + len = snprintf(modalias, size, "acpi:"); + if (len <= 0) + return len; + + size -= len; + + list_for_each_entry(id, &acpi_dev->pnp.ids, list) { + if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) + continue; + + count = snprintf(&modalias[len], size, "%s:", id->id); + if (count < 0) + return -EINVAL; + + if (count >= size) + return -ENOMEM; + + len += count; + size -= count; + } + modalias[len] = '\0'; + return len; +} + +/** + * create_of_modalias - Creates DT compatible string for modalias and uevent + * @acpi_dev: ACPI device object. + * @modalias: Buffer to print into. + * @size: Size of the buffer. + * + * Expose DT compatible modalias as of:NnameTCcompatible. This function should + * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of + * ACPI/PNP IDs. + */ +static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, + int size) +{ + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; + const union acpi_object *of_compatible, *obj; + int len, count; + int i, nval; + char *c; + + acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + /* DT strings are all in lower case */ + for (c = buf.pointer; *c != '\0'; c++) + *c = tolower(*c); + + len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); + ACPI_FREE(buf.pointer); + + if (len <= 0) + return len; + + of_compatible = acpi_dev->data.of_compatible; + if (of_compatible->type == ACPI_TYPE_PACKAGE) { + nval = of_compatible->package.count; + obj = of_compatible->package.elements; + } else { /* Must be ACPI_TYPE_STRING. */ + nval = 1; + obj = of_compatible; + } + for (i = 0; i < nval; i++, obj++) { + count = snprintf(&modalias[len], size, "C%s", + obj->string.pointer); + if (count < 0) + return -EINVAL; + + if (count >= size) + return -ENOMEM; + + len += count; + size -= count; + } + modalias[len] = '\0'; + return len; +} + +int __acpi_device_uevent_modalias(struct acpi_device *adev, + struct kobj_uevent_env *env) +{ + int len; + + if (!adev) + return -ENODEV; + + if (list_empty(&adev->pnp.ids)) + return 0; + + if (add_uevent_var(env, "MODALIAS=")) + return -ENOMEM; + + len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + if (len < 0) + return len; + + env->buflen += len; + if (!adev->data.of_compatible) + return 0; + + if (len > 0 && add_uevent_var(env, "MODALIAS=")) + return -ENOMEM; + + len = create_of_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + if (len < 0) + return len; + + env->buflen += len; + + return 0; +} + +/** + * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices. + * + * Create the uevent modalias field for ACPI-enumerated devices. + * + * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with + * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". + */ +int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) +{ + return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); +} +EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); + +static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size) +{ + int len, count; + + if (!adev) + return -ENODEV; + + if (list_empty(&adev->pnp.ids)) + return 0; + + len = create_pnp_modalias(adev, buf, size - 1); + if (len < 0) { + return len; + } else if (len > 0) { + buf[len++] = '\n'; + size -= len; + } + if (!adev->data.of_compatible) + return len; + + count = create_of_modalias(adev, buf + len, size - 1); + if (count < 0) { + return count; + } else if (count > 0) { + len += count; + buf[len++] = '\n'; + } + + return len; +} + +/** + * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices. + * + * Create the modalias sysfs attribute for ACPI-enumerated devices. + * + * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with + * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". + */ +int acpi_device_modalias(struct device *dev, char *buf, int size) +{ + return __acpi_device_modalias(acpi_companion_match(dev), buf, size); +} +EXPORT_SYMBOL_GPL(acpi_device_modalias); + +static ssize_t +acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { + return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); +} +static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); + +static ssize_t real_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *adev = to_acpi_device(dev); + int state; + int ret; + + ret = acpi_device_get_power(adev, &state); + if (ret) + return ret; + + return sprintf(buf, "%s\n", acpi_power_state_string(state)); +} + +static DEVICE_ATTR(real_power_state, 0444, real_power_state_show, NULL); + +static ssize_t power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *adev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); +} + +static DEVICE_ATTR(power_state, 0444, power_state_show, NULL); + +static ssize_t +acpi_eject_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_device *acpi_device = to_acpi_device(d); + acpi_object_type not_used; + acpi_status status; + + if (!count || buf[0] != '1') + return -EINVAL; + + if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) + && !acpi_device->driver) + return -ENODEV; + + status = acpi_get_type(acpi_device->handle, ¬_used); + if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) + return -ENODEV; + + get_device(&acpi_device->dev); + status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); + if (ACPI_SUCCESS(status)) + return count; + + put_device(&acpi_device->dev); + acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, + ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); + return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; +} + +static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store); + +static ssize_t +acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); +} +static DEVICE_ATTR(hid, 0444, acpi_device_hid_show, NULL); + +static ssize_t acpi_device_uid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_dev->pnp.unique_id); +} +static DEVICE_ATTR(uid, 0444, acpi_device_uid_show, NULL); + +static ssize_t acpi_device_adr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "0x%08x\n", + (unsigned int)(acpi_dev->pnp.bus_address)); +} +static DEVICE_ATTR(adr, 0444, acpi_device_adr_show, NULL); + +static ssize_t +acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; + int result; + + result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path); + if (result) + goto end; + + result = sprintf(buf, "%s\n", (char*)path.pointer); + kfree(path.pointer); +end: + return result; +} +static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL); + +/* sysfs file that shows description text from the ACPI _STR method */ +static ssize_t description_show(struct device *dev, + struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + int result; + + if (acpi_dev->pnp.str_obj == NULL) + return 0; + + /* + * The _STR object contains a Unicode identifier for a device. + * We need to convert to utf-8 so it can be displayed. + */ + result = utf16s_to_utf8s( + (wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer, + acpi_dev->pnp.str_obj->buffer.length, + UTF16_LITTLE_ENDIAN, buf, + PAGE_SIZE); + + buf[result++] = '\n'; + + return result; +} +static DEVICE_ATTR(description, 0444, description_show, NULL); + +static ssize_t +acpi_device_sun_show(struct device *dev, struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + acpi_status status; + unsigned long long sun; + + status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return sprintf(buf, "%llu\n", sun); +} +static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL); + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + acpi_status status; + unsigned long long sta; + + status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return sprintf(buf, "%llu\n", sta); +} +static DEVICE_ATTR_RO(status); + +/** + * acpi_device_setup_files - Create sysfs attributes of an ACPI device. + * @dev: ACPI device object. + */ +int acpi_device_setup_files(struct acpi_device *dev) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + int result = 0; + + /* + * Devices gotten from FADT don't have a "path" attribute + */ + if (dev->handle) { + result = device_create_file(&dev->dev, &dev_attr_path); + if (result) + goto end; + } + + if (!list_empty(&dev->pnp.ids)) { + result = device_create_file(&dev->dev, &dev_attr_hid); + if (result) + goto end; + + result = device_create_file(&dev->dev, &dev_attr_modalias); + if (result) + goto end; + } + + /* + * If device has _STR, 'description' file is created + */ + if (acpi_has_method(dev->handle, "_STR")) { + status = acpi_evaluate_object(dev->handle, "_STR", + NULL, &buffer); + if (ACPI_FAILURE(status)) + buffer.pointer = NULL; + dev->pnp.str_obj = buffer.pointer; + result = device_create_file(&dev->dev, &dev_attr_description); + if (result) + goto end; + } + + if (dev->pnp.type.bus_address) + result = device_create_file(&dev->dev, &dev_attr_adr); + if (dev->pnp.unique_id) + result = device_create_file(&dev->dev, &dev_attr_uid); + + if (acpi_has_method(dev->handle, "_SUN")) { + result = device_create_file(&dev->dev, &dev_attr_sun); + if (result) + goto end; + } + + if (acpi_has_method(dev->handle, "_STA")) { + result = device_create_file(&dev->dev, &dev_attr_status); + if (result) + goto end; + } + + /* + * If device has _EJ0, 'eject' file is created that is used to trigger + * hot-removal function from userland. + */ + if (acpi_has_method(dev->handle, "_EJ0")) { + result = device_create_file(&dev->dev, &dev_attr_eject); + if (result) + return result; + } + + if (dev->flags.power_manageable) { + result = device_create_file(&dev->dev, &dev_attr_power_state); + if (result) + return result; + + if (dev->power.flags.power_resources) + result = device_create_file(&dev->dev, + &dev_attr_real_power_state); + } + +end: + return result; +} + +/** + * acpi_device_remove_files - Remove sysfs attributes of an ACPI device. + * @dev: ACPI device object. + */ +void acpi_device_remove_files(struct acpi_device *dev) +{ + if (dev->flags.power_manageable) { + device_remove_file(&dev->dev, &dev_attr_power_state); + if (dev->power.flags.power_resources) + device_remove_file(&dev->dev, + &dev_attr_real_power_state); + } + + /* + * If device has _STR, remove 'description' file + */ + if (acpi_has_method(dev->handle, "_STR")) { + kfree(dev->pnp.str_obj); + device_remove_file(&dev->dev, &dev_attr_description); + } + /* + * If device has _EJ0, remove 'eject' file. + */ + if (acpi_has_method(dev->handle, "_EJ0")) + device_remove_file(&dev->dev, &dev_attr_eject); + + if (acpi_has_method(dev->handle, "_SUN")) + device_remove_file(&dev->dev, &dev_attr_sun); + + if (dev->pnp.unique_id) + device_remove_file(&dev->dev, &dev_attr_uid); + if (dev->pnp.type.bus_address) + device_remove_file(&dev->dev, &dev_attr_adr); + device_remove_file(&dev->dev, &dev_attr_modalias); + device_remove_file(&dev->dev, &dev_attr_hid); + if (acpi_has_method(dev->handle, "_STA")) + device_remove_file(&dev->dev, &dev_attr_status); + if (dev->handle) + device_remove_file(&dev->dev, &dev_attr_path); +} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4683a96..c529454 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -93,12 +93,21 @@ int acpi_device_add(struct acpi_device *device, void (*release)(struct device *)); void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, int type, unsigned long long sta); +int acpi_device_setup_files(struct acpi_device *dev); +void acpi_device_remove_files(struct acpi_device *dev); void acpi_device_add_finalize(struct acpi_device *device); void acpi_free_pnp_ids(struct acpi_device_pnp *pnp); bool acpi_device_is_present(struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); /* -------------------------------------------------------------------------- + Device Matching and Notification + -------------------------------------------------------------------------- */ +struct acpi_device *acpi_companion_match(const struct device *dev); +int __acpi_device_uevent_modalias(struct acpi_device *adev, + struct kobj_uevent_env *env); + +/* -------------------------------------------------------------------------- Power Resource -------------------------------------------------------------------------- */ int acpi_power_init(void); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ec25635..099831f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -115,117 +115,6 @@ int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, return 0; } -/** - * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent - * @acpi_dev: ACPI device object. - * @modalias: Buffer to print into. - * @size: Size of the buffer. - * - * Creates hid/cid(s) string needed for modalias and uevent - * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: - * char *modalias: "acpi:IBM0001:ACPI0001" - * Return: 0: no _HID and no _CID - * -EINVAL: output error - * -ENOMEM: output is truncated -*/ -static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, - int size) -{ - int len; - int count; - struct acpi_hardware_id *id; - - /* - * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should - * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the - * device's list. - */ - count = 0; - list_for_each_entry(id, &acpi_dev->pnp.ids, list) - if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) - count++; - - if (!count) - return 0; - - len = snprintf(modalias, size, "acpi:"); - if (len <= 0) - return len; - - size -= len; - - list_for_each_entry(id, &acpi_dev->pnp.ids, list) { - if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) - continue; - - count = snprintf(&modalias[len], size, "%s:", id->id); - if (count < 0) - return -EINVAL; - - if (count >= size) - return -ENOMEM; - - len += count; - size -= count; - } - modalias[len] = '\0'; - return len; -} - -/** - * create_of_modalias - Creates DT compatible string for modalias and uevent - * @acpi_dev: ACPI device object. - * @modalias: Buffer to print into. - * @size: Size of the buffer. - * - * Expose DT compatible modalias as of:NnameTCcompatible. This function should - * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of - * ACPI/PNP IDs. - */ -static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, - int size) -{ - struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; - const union acpi_object *of_compatible, *obj; - int len, count; - int i, nval; - char *c; - - acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); - /* DT strings are all in lower case */ - for (c = buf.pointer; *c != '\0'; c++) - *c = tolower(*c); - - len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); - ACPI_FREE(buf.pointer); - - if (len <= 0) - return len; - - of_compatible = acpi_dev->data.of_compatible; - if (of_compatible->type == ACPI_TYPE_PACKAGE) { - nval = of_compatible->package.count; - obj = of_compatible->package.elements; - } else { /* Must be ACPI_TYPE_STRING. */ - nval = 1; - obj = of_compatible; - } - for (i = 0; i < nval; i++, obj++) { - count = snprintf(&modalias[len], size, "C%s", - obj->string.pointer); - if (count < 0) - return -EINVAL; - - if (count >= size) - return -ENOMEM; - - len += count; - size -= count; - } - modalias[len] = '\0'; - return len; -} - /* * acpi_companion_match() - Can we match via ACPI companion device * @dev: Device in question @@ -247,7 +136,7 @@ static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, * resources available from it but they will be matched normally using functions * provided by their bus types (and analogously for their modalias). */ -static struct acpi_device *acpi_companion_match(const struct device *dev) +struct acpi_device *acpi_companion_match(const struct device *dev) { struct acpi_device *adev; struct mutex *physical_node_lock; @@ -276,103 +165,6 @@ static struct acpi_device *acpi_companion_match(const struct device *dev) return adev; } -static int __acpi_device_uevent_modalias(struct acpi_device *adev, - struct kobj_uevent_env *env) -{ - int len; - - if (!adev) - return -ENODEV; - - if (list_empty(&adev->pnp.ids)) - return 0; - - if (add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - if (!adev->data.of_compatible) - return 0; - - if (len > 0 && add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_of_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - - return 0; -} - -/* - * Creates uevent modalias field for ACPI enumerated devices. - * Because the other buses does not support ACPI HIDs & CIDs. - * e.g. for a device with hid:IBM0001 and cid:ACPI0001 you get: - * "acpi:IBM0001:ACPI0001" - */ -int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) -{ - return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); -} -EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); - -static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size) -{ - int len, count; - - if (!adev) - return -ENODEV; - - if (list_empty(&adev->pnp.ids)) - return 0; - - len = create_pnp_modalias(adev, buf, size - 1); - if (len < 0) { - return len; - } else if (len > 0) { - buf[len++] = '\n'; - size -= len; - } - if (!adev->data.of_compatible) - return len; - - count = create_of_modalias(adev, buf + len, size - 1); - if (count < 0) { - return count; - } else if (count > 0) { - len += count; - buf[len++] = '\n'; - } - - return len; -} - -/* - * Creates modalias sysfs attribute for ACPI enumerated devices. - * Because the other buses does not support ACPI HIDs & CIDs. - * e.g. for a device with hid:IBM0001 and cid:ACPI0001 you get: - * "acpi:IBM0001:ACPI0001" - */ -int acpi_device_modalias(struct device *dev, char *buf, int size) -{ - return __acpi_device_modalias(acpi_companion_match(dev), buf, size); -} -EXPORT_SYMBOL_GPL(acpi_device_modalias); - -static ssize_t -acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); -} -static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); - bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent) { struct acpi_device_physical_node *pn; @@ -701,279 +493,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) unlock_device_hotplug(); } -static ssize_t real_power_state_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *adev = to_acpi_device(dev); - int state; - int ret; - - ret = acpi_device_get_power(adev, &state); - if (ret) - return ret; - - return sprintf(buf, "%s\n", acpi_power_state_string(state)); -} - -static DEVICE_ATTR(real_power_state, 0444, real_power_state_show, NULL); - -static ssize_t power_state_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *adev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); -} - -static DEVICE_ATTR(power_state, 0444, power_state_show, NULL); - -static ssize_t -acpi_eject_store(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct acpi_device *acpi_device = to_acpi_device(d); - acpi_object_type not_used; - acpi_status status; - - if (!count || buf[0] != '1') - return -EINVAL; - - if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) - && !acpi_device->driver) - return -ENODEV; - - status = acpi_get_type(acpi_device->handle, ¬_used); - if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) - return -ENODEV; - - get_device(&acpi_device->dev); - status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); - if (ACPI_SUCCESS(status)) - return count; - - put_device(&acpi_device->dev); - acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, - ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); - return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; -} - -static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store); - -static ssize_t -acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); -} -static DEVICE_ATTR(hid, 0444, acpi_device_hid_show, NULL); - -static ssize_t acpi_device_uid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_dev->pnp.unique_id); -} -static DEVICE_ATTR(uid, 0444, acpi_device_uid_show, NULL); - -static ssize_t acpi_device_adr_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "0x%08x\n", - (unsigned int)(acpi_dev->pnp.bus_address)); -} -static DEVICE_ATTR(adr, 0444, acpi_device_adr_show, NULL); - -static ssize_t -acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; - int result; - - result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path); - if (result) - goto end; - - result = sprintf(buf, "%s\n", (char*)path.pointer); - kfree(path.pointer); -end: - return result; -} -static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL); - -/* sysfs file that shows description text from the ACPI _STR method */ -static ssize_t description_show(struct device *dev, - struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - int result; - - if (acpi_dev->pnp.str_obj == NULL) - return 0; - - /* - * The _STR object contains a Unicode identifier for a device. - * We need to convert to utf-8 so it can be displayed. - */ - result = utf16s_to_utf8s( - (wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer, - acpi_dev->pnp.str_obj->buffer.length, - UTF16_LITTLE_ENDIAN, buf, - PAGE_SIZE); - - buf[result++] = '\n'; - - return result; -} -static DEVICE_ATTR(description, 0444, description_show, NULL); - -static ssize_t -acpi_device_sun_show(struct device *dev, struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - acpi_status status; - unsigned long long sun; - - status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); - if (ACPI_FAILURE(status)) - return -ENODEV; - - return sprintf(buf, "%llu\n", sun); -} -static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL); - -static ssize_t status_show(struct device *dev, struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - acpi_status status; - unsigned long long sta; - - status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); - if (ACPI_FAILURE(status)) - return -ENODEV; - - return sprintf(buf, "%llu\n", sta); -} -static DEVICE_ATTR_RO(status); - -static int acpi_device_setup_files(struct acpi_device *dev) -{ - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; - int result = 0; - - /* - * Devices gotten from FADT don't have a "path" attribute - */ - if (dev->handle) { - result = device_create_file(&dev->dev, &dev_attr_path); - if (result) - goto end; - } - - if (!list_empty(&dev->pnp.ids)) { - result = device_create_file(&dev->dev, &dev_attr_hid); - if (result) - goto end; - - result = device_create_file(&dev->dev, &dev_attr_modalias); - if (result) - goto end; - } - - /* - * If device has _STR, 'description' file is created - */ - if (acpi_has_method(dev->handle, "_STR")) { - status = acpi_evaluate_object(dev->handle, "_STR", - NULL, &buffer); - if (ACPI_FAILURE(status)) - buffer.pointer = NULL; - dev->pnp.str_obj = buffer.pointer; - result = device_create_file(&dev->dev, &dev_attr_description); - if (result) - goto end; - } - - if (dev->pnp.type.bus_address) - result = device_create_file(&dev->dev, &dev_attr_adr); - if (dev->pnp.unique_id) - result = device_create_file(&dev->dev, &dev_attr_uid); - - if (acpi_has_method(dev->handle, "_SUN")) { - result = device_create_file(&dev->dev, &dev_attr_sun); - if (result) - goto end; - } - - if (acpi_has_method(dev->handle, "_STA")) { - result = device_create_file(&dev->dev, &dev_attr_status); - if (result) - goto end; - } - - /* - * If device has _EJ0, 'eject' file is created that is used to trigger - * hot-removal function from userland. - */ - if (acpi_has_method(dev->handle, "_EJ0")) { - result = device_create_file(&dev->dev, &dev_attr_eject); - if (result) - return result; - } - - if (dev->flags.power_manageable) { - result = device_create_file(&dev->dev, &dev_attr_power_state); - if (result) - return result; - - if (dev->power.flags.power_resources) - result = device_create_file(&dev->dev, - &dev_attr_real_power_state); - } - -end: - return result; -} - -static void acpi_device_remove_files(struct acpi_device *dev) -{ - if (dev->flags.power_manageable) { - device_remove_file(&dev->dev, &dev_attr_power_state); - if (dev->power.flags.power_resources) - device_remove_file(&dev->dev, - &dev_attr_real_power_state); - } - - /* - * If device has _STR, remove 'description' file - */ - if (acpi_has_method(dev->handle, "_STR")) { - kfree(dev->pnp.str_obj); - device_remove_file(&dev->dev, &dev_attr_description); - } - /* - * If device has _EJ0, remove 'eject' file. - */ - if (acpi_has_method(dev->handle, "_EJ0")) - device_remove_file(&dev->dev, &dev_attr_eject); - - if (acpi_has_method(dev->handle, "_SUN")) - device_remove_file(&dev->dev, &dev_attr_sun); - - if (dev->pnp.unique_id) - device_remove_file(&dev->dev, &dev_attr_uid); - if (dev->pnp.type.bus_address) - device_remove_file(&dev->dev, &dev_attr_adr); - device_remove_file(&dev->dev, &dev_attr_modalias); - device_remove_file(&dev->dev, &dev_attr_hid); - if (acpi_has_method(dev->handle, "_STA")) - device_remove_file(&dev->dev, &dev_attr_status); - if (dev->handle) - device_remove_file(&dev->dev, &dev_attr_path); -} /* -------------------------------------------------------------------------- ACPI Bus operations -------------------------------------------------------------------------- */ -- cgit v0.10.2 From 68c6b148daa6e45a85b31ef60ed9c9bfd556fff0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:53:52 +0200 Subject: ACPI / scan: Move device matching code to bus.c To reduce the size of scan.c and improve the readability of it, move code related device matching into drivers/acpi/bus.c. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 513e7230e..ce80580 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -424,6 +424,200 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) } /* -------------------------------------------------------------------------- + Device Matching + -------------------------------------------------------------------------- */ + +/* + * acpi_companion_match() - Can we match via ACPI companion device + * @dev: Device in question + * + * Check if the given device has an ACPI companion and if that companion has + * a valid list of PNP IDs, and if the device is the first (primary) physical + * device associated with it. Return the companion pointer if that's the case + * or NULL otherwise. + * + * If multiple physical devices are attached to a single ACPI companion, we need + * to be careful. The usage scenario for this kind of relationship is that all + * of the physical devices in question use resources provided by the ACPI + * companion. A typical case is an MFD device where all the sub-devices share + * the parent's ACPI companion. In such cases we can only allow the primary + * (first) physical device to be matched with the help of the companion's PNP + * IDs. + * + * Additional physical devices sharing the ACPI companion can still use + * resources available from it but they will be matched normally using functions + * provided by their bus types (and analogously for their modalias). + */ +struct acpi_device *acpi_companion_match(const struct device *dev) +{ + struct acpi_device *adev; + struct mutex *physical_node_lock; + + adev = ACPI_COMPANION(dev); + if (!adev) + return NULL; + + if (list_empty(&adev->pnp.ids)) + return NULL; + + physical_node_lock = &adev->physical_node_lock; + mutex_lock(physical_node_lock); + if (list_empty(&adev->physical_node_list)) { + adev = NULL; + } else { + const struct acpi_device_physical_node *node; + + node = list_first_entry(&adev->physical_node_list, + struct acpi_device_physical_node, node); + if (node->dev != dev) + adev = NULL; + } + mutex_unlock(physical_node_lock); + + return adev; +} + +/** + * acpi_of_match_device - Match device object using the "compatible" property. + * @adev: ACPI device object to match. + * @of_match_table: List of device IDs to match against. + * + * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of + * identifiers and a _DSD object with the "compatible" property, use that + * property to match against the given list of identifiers. + */ +static bool acpi_of_match_device(struct acpi_device *adev, + const struct of_device_id *of_match_table) +{ + const union acpi_object *of_compatible, *obj; + int i, nval; + + if (!adev) + return false; + + of_compatible = adev->data.of_compatible; + if (!of_match_table || !of_compatible) + return false; + + if (of_compatible->type == ACPI_TYPE_PACKAGE) { + nval = of_compatible->package.count; + obj = of_compatible->package.elements; + } else { /* Must be ACPI_TYPE_STRING. */ + nval = 1; + obj = of_compatible; + } + /* Now we can look for the driver DT compatible strings */ + for (i = 0; i < nval; i++, obj++) { + const struct of_device_id *id; + + for (id = of_match_table; id->compatible[0]; id++) + if (!strcasecmp(obj->string.pointer, id->compatible)) + return true; + } + + return false; +} + +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + +static const struct acpi_device_id *__acpi_match_device( + struct acpi_device *device, + const struct acpi_device_id *ids, + const struct of_device_id *of_ids) +{ + const struct acpi_device_id *id; + struct acpi_hardware_id *hwid; + + /* + * If the device is not present, it is unnecessary to load device + * driver for it. + */ + if (!device || !device->status.present) + return NULL; + + list_for_each_entry(hwid, &device->pnp.ids, list) { + /* First, check the ACPI/PNP IDs provided by the caller. */ + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) + return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } + + /* + * Next, check ACPI_DT_NAMESPACE_HID and try to match the + * "compatible" property if found. + * + * The id returned by the below is not valid, but the only + * caller passing non-NULL of_ids here is only interested in + * whether or not the return value is NULL. + */ + if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id) + && acpi_of_match_device(device, of_ids)) + return id; + } + return NULL; +} + +/** + * acpi_match_device - Match a struct device against a given list of ACPI IDs + * @ids: Array of struct acpi_device_id object to match against. + * @dev: The device structure to match. + * + * Check if @dev has a valid ACPI handle and if there is a struct acpi_device + * object for that handle and use that object to match against a given list of + * device IDs. + * + * Return a pointer to the first matching ID on success or %NULL on failure. + */ +const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, + const struct device *dev) +{ + return __acpi_match_device(acpi_companion_match(dev), ids, NULL); +} +EXPORT_SYMBOL_GPL(acpi_match_device); + +int acpi_match_device_ids(struct acpi_device *device, + const struct acpi_device_id *ids) +{ + return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT; +} +EXPORT_SYMBOL(acpi_match_device_ids); + +bool acpi_driver_match_device(struct device *dev, + const struct device_driver *drv) +{ + if (!drv->acpi_match_table) + return acpi_of_match_device(ACPI_COMPANION(dev), + drv->of_match_table); + + return !!__acpi_match_device(acpi_companion_match(dev), + drv->acpi_match_table, drv->of_match_table); +} +EXPORT_SYMBOL_GPL(acpi_driver_match_device); + +/* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 099831f..68877bc 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -115,55 +115,6 @@ int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, return 0; } -/* - * acpi_companion_match() - Can we match via ACPI companion device - * @dev: Device in question - * - * Check if the given device has an ACPI companion and if that companion has - * a valid list of PNP IDs, and if the device is the first (primary) physical - * device associated with it. Return the companion pointer if that's the case - * or NULL otherwise. - * - * If multiple physical devices are attached to a single ACPI companion, we need - * to be careful. The usage scenario for this kind of relationship is that all - * of the physical devices in question use resources provided by the ACPI - * companion. A typical case is an MFD device where all the sub-devices share - * the parent's ACPI companion. In such cases we can only allow the primary - * (first) physical device to be matched with the help of the companion's PNP - * IDs. - * - * Additional physical devices sharing the ACPI companion can still use - * resources available from it but they will be matched normally using functions - * provided by their bus types (and analogously for their modalias). - */ -struct acpi_device *acpi_companion_match(const struct device *dev) -{ - struct acpi_device *adev; - struct mutex *physical_node_lock; - - adev = ACPI_COMPANION(dev); - if (!adev) - return NULL; - - if (list_empty(&adev->pnp.ids)) - return NULL; - - physical_node_lock = &adev->physical_node_lock; - mutex_lock(physical_node_lock); - if (list_empty(&adev->physical_node_list)) { - adev = NULL; - } else { - const struct acpi_device_physical_node *node; - - node = list_first_entry(&adev->physical_node_list, - struct acpi_device_physical_node, node); - if (node->dev != dev) - adev = NULL; - } - mutex_unlock(physical_node_lock); - - return adev; -} bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent) { @@ -497,146 +448,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) ACPI Bus operations -------------------------------------------------------------------------- */ -/** - * acpi_of_match_device - Match device object using the "compatible" property. - * @adev: ACPI device object to match. - * @of_match_table: List of device IDs to match against. - * - * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of - * identifiers and a _DSD object with the "compatible" property, use that - * property to match against the given list of identifiers. - */ -static bool acpi_of_match_device(struct acpi_device *adev, - const struct of_device_id *of_match_table) -{ - const union acpi_object *of_compatible, *obj; - int i, nval; - - if (!adev) - return false; - - of_compatible = adev->data.of_compatible; - if (!of_match_table || !of_compatible) - return false; - - if (of_compatible->type == ACPI_TYPE_PACKAGE) { - nval = of_compatible->package.count; - obj = of_compatible->package.elements; - } else { /* Must be ACPI_TYPE_STRING. */ - nval = 1; - obj = of_compatible; - } - /* Now we can look for the driver DT compatible strings */ - for (i = 0; i < nval; i++, obj++) { - const struct of_device_id *id; - - for (id = of_match_table; id->compatible[0]; id++) - if (!strcasecmp(obj->string.pointer, id->compatible)) - return true; - } - - return false; -} - -static bool __acpi_match_device_cls(const struct acpi_device_id *id, - struct acpi_hardware_id *hwid) -{ - int i, msk, byte_shift; - char buf[3]; - - if (!id->cls) - return false; - - /* Apply class-code bitmask, before checking each class-code byte */ - for (i = 1; i <= 3; i++) { - byte_shift = 8 * (3 - i); - msk = (id->cls_msk >> byte_shift) & 0xFF; - if (!msk) - continue; - - sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); - if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) - return false; - } - return true; -} - -static const struct acpi_device_id *__acpi_match_device( - struct acpi_device *device, - const struct acpi_device_id *ids, - const struct of_device_id *of_ids) -{ - const struct acpi_device_id *id; - struct acpi_hardware_id *hwid; - - /* - * If the device is not present, it is unnecessary to load device - * driver for it. - */ - if (!device || !device->status.present) - return NULL; - - list_for_each_entry(hwid, &device->pnp.ids, list) { - /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0] || id->cls; id++) { - if (id->id[0] && !strcmp((char *) id->id, hwid->id)) - return id; - else if (id->cls && __acpi_match_device_cls(id, hwid)) - return id; - } - - /* - * Next, check ACPI_DT_NAMESPACE_HID and try to match the - * "compatible" property if found. - * - * The id returned by the below is not valid, but the only - * caller passing non-NULL of_ids here is only interested in - * whether or not the return value is NULL. - */ - if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id) - && acpi_of_match_device(device, of_ids)) - return id; - } - return NULL; -} - -/** - * acpi_match_device - Match a struct device against a given list of ACPI IDs - * @ids: Array of struct acpi_device_id object to match against. - * @dev: The device structure to match. - * - * Check if @dev has a valid ACPI handle and if there is a struct acpi_device - * object for that handle and use that object to match against a given list of - * device IDs. - * - * Return a pointer to the first matching ID on success or %NULL on failure. - */ -const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, - const struct device *dev) -{ - return __acpi_match_device(acpi_companion_match(dev), ids, NULL); -} -EXPORT_SYMBOL_GPL(acpi_match_device); - -int acpi_match_device_ids(struct acpi_device *device, - const struct acpi_device_id *ids) -{ - return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT; -} -EXPORT_SYMBOL(acpi_match_device_ids); - -bool acpi_driver_match_device(struct device *dev, - const struct device_driver *drv) -{ - if (!drv->acpi_match_table) - return acpi_of_match_device(ACPI_COMPANION(dev), - drv->of_match_table); - - return !!__acpi_match_device(acpi_companion_match(dev), - drv->acpi_match_table, drv->of_match_table); -} -EXPORT_SYMBOL_GPL(acpi_driver_match_device); - static void acpi_free_power_resources_lists(struct acpi_device *device) { int i; -- cgit v0.10.2 From 5894b0c46e49b5ecc25f22b2d1b8232aab00ce97 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:54:01 +0200 Subject: ACPI / scan: Move bus operations and notification routines to bus.c To reduce the size of scan.c and improve the readability of it, move code related to device notification, the definitions of the ACPI bus operations and the driver management code to drivers/acpi/bus.c. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index ce80580..2f50fc4 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -423,6 +423,65 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) acpi_evaluate_ost(handle, type, ost_code, NULL); } +static void acpi_device_notify(acpi_handle handle, u32 event, void *data) +{ + struct acpi_device *device = data; + + device->driver->ops.notify(device, event); +} + +static void acpi_device_notify_fixed(void *data) +{ + struct acpi_device *device = data; + + /* Fixed hardware devices have no handles */ + acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); +} + +static u32 acpi_device_fixed_event(void *data) +{ + acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); + return ACPI_INTERRUPT_HANDLED; +} + +static int acpi_device_install_notify_handler(struct acpi_device *device) +{ + acpi_status status; + + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event, + device); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event, + device); + else + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_device_notify, + device); + + if (ACPI_FAILURE(status)) + return -EINVAL; + return 0; +} + +static void acpi_device_remove_notify_handler(struct acpi_device *device) +{ + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event); + else + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_device_notify); +} + /* -------------------------------------------------------------------------- Device Matching -------------------------------------------------------------------------- */ @@ -618,6 +677,131 @@ bool acpi_driver_match_device(struct device *dev, EXPORT_SYMBOL_GPL(acpi_driver_match_device); /* -------------------------------------------------------------------------- + ACPI Driver Management + -------------------------------------------------------------------------- */ + +/** + * acpi_bus_register_driver - register a driver with the ACPI bus + * @driver: driver being registered + * + * Registers a driver with the ACPI bus. Searches the namespace for all + * devices that match the driver's criteria and binds. Returns zero for + * success or a negative error status for failure. + */ +int acpi_bus_register_driver(struct acpi_driver *driver) +{ + int ret; + + if (acpi_disabled) + return -ENODEV; + driver->drv.name = driver->name; + driver->drv.bus = &acpi_bus_type; + driver->drv.owner = driver->owner; + + ret = driver_register(&driver->drv); + return ret; +} + +EXPORT_SYMBOL(acpi_bus_register_driver); + +/** + * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus + * @driver: driver to unregister + * + * Unregisters a driver with the ACPI bus. Searches the namespace for all + * devices that match the driver's criteria and unbinds. + */ +void acpi_bus_unregister_driver(struct acpi_driver *driver) +{ + driver_unregister(&driver->drv); +} + +EXPORT_SYMBOL(acpi_bus_unregister_driver); + +/* -------------------------------------------------------------------------- + ACPI Bus operations + -------------------------------------------------------------------------- */ + +static int acpi_bus_match(struct device *dev, struct device_driver *drv) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = to_acpi_driver(drv); + + return acpi_dev->flags.match_driver + && !acpi_match_device_ids(acpi_dev, acpi_drv->ids); +} + +static int acpi_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return __acpi_device_uevent_modalias(to_acpi_device(dev), env); +} + +static int acpi_device_probe(struct device *dev) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); + int ret; + + if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev)) + return -EINVAL; + + if (!acpi_drv->ops.add) + return -ENOSYS; + + ret = acpi_drv->ops.add(acpi_dev); + if (ret) + return ret; + + acpi_dev->driver = acpi_drv; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Driver [%s] successfully bound to device [%s]\n", + acpi_drv->name, acpi_dev->pnp.bus_id)); + + if (acpi_drv->ops.notify) { + ret = acpi_device_install_notify_handler(acpi_dev); + if (ret) { + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); + + acpi_dev->driver = NULL; + acpi_dev->driver_data = NULL; + return ret; + } + } + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found driver [%s] for device [%s]\n", + acpi_drv->name, acpi_dev->pnp.bus_id)); + get_device(dev); + return 0; +} + +static int acpi_device_remove(struct device * dev) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = acpi_dev->driver; + + if (acpi_drv) { + if (acpi_drv->ops.notify) + acpi_device_remove_notify_handler(acpi_dev); + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); + } + acpi_dev->driver = NULL; + acpi_dev->driver_data = NULL; + + put_device(dev); + return 0; +} + +struct bus_type acpi_bus_type = { + .name = "acpi", + .match = acpi_bus_match, + .probe = acpi_device_probe, + .remove = acpi_device_remove, + .uevent = acpi_device_uevent, +}; + +/* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 68877bc..f541f68 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -444,10 +444,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) unlock_device_hotplug(); } -/* -------------------------------------------------------------------------- - ACPI Bus operations - -------------------------------------------------------------------------- */ - static void acpi_free_power_resources_lists(struct acpi_device *device) { int i; @@ -474,144 +470,6 @@ static void acpi_device_release(struct device *dev) kfree(acpi_dev); } -static int acpi_bus_match(struct device *dev, struct device_driver *drv) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = to_acpi_driver(drv); - - return acpi_dev->flags.match_driver - && !acpi_match_device_ids(acpi_dev, acpi_drv->ids); -} - -static int acpi_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return __acpi_device_uevent_modalias(to_acpi_device(dev), env); -} - -static void acpi_device_notify(acpi_handle handle, u32 event, void *data) -{ - struct acpi_device *device = data; - - device->driver->ops.notify(device, event); -} - -static void acpi_device_notify_fixed(void *data) -{ - struct acpi_device *device = data; - - /* Fixed hardware devices have no handles */ - acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); -} - -static u32 acpi_device_fixed_event(void *data) -{ - acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); - return ACPI_INTERRUPT_HANDLED; -} - -static int acpi_device_install_notify_handler(struct acpi_device *device) -{ - acpi_status status; - - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) - status = - acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_fixed_event, - device); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) - status = - acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_fixed_event, - device); - else - status = acpi_install_notify_handler(device->handle, - ACPI_DEVICE_NOTIFY, - acpi_device_notify, - device); - - if (ACPI_FAILURE(status)) - return -EINVAL; - return 0; -} - -static void acpi_device_remove_notify_handler(struct acpi_device *device) -{ - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) - acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_fixed_event); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) - acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_fixed_event); - else - acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, - acpi_device_notify); -} - -static int acpi_device_probe(struct device *dev) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); - int ret; - - if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev)) - return -EINVAL; - - if (!acpi_drv->ops.add) - return -ENOSYS; - - ret = acpi_drv->ops.add(acpi_dev); - if (ret) - return ret; - - acpi_dev->driver = acpi_drv; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Driver [%s] successfully bound to device [%s]\n", - acpi_drv->name, acpi_dev->pnp.bus_id)); - - if (acpi_drv->ops.notify) { - ret = acpi_device_install_notify_handler(acpi_dev); - if (ret) { - if (acpi_drv->ops.remove) - acpi_drv->ops.remove(acpi_dev); - - acpi_dev->driver = NULL; - acpi_dev->driver_data = NULL; - return ret; - } - } - - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found driver [%s] for device [%s]\n", - acpi_drv->name, acpi_dev->pnp.bus_id)); - get_device(dev); - return 0; -} - -static int acpi_device_remove(struct device * dev) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = acpi_dev->driver; - - if (acpi_drv) { - if (acpi_drv->ops.notify) - acpi_device_remove_notify_handler(acpi_dev); - if (acpi_drv->ops.remove) - acpi_drv->ops.remove(acpi_dev); - } - acpi_dev->driver = NULL; - acpi_dev->driver_data = NULL; - - put_device(dev); - return 0; -} - -struct bus_type acpi_bus_type = { - .name = "acpi", - .match = acpi_bus_match, - .probe = acpi_device_probe, - .remove = acpi_device_remove, - .uevent = acpi_device_uevent, -}; - static void acpi_device_del(struct acpi_device *device) { mutex_lock(&acpi_device_lock); @@ -859,47 +717,6 @@ struct acpi_device *acpi_get_next_child(struct device *dev, } /* -------------------------------------------------------------------------- - Driver Management - -------------------------------------------------------------------------- */ -/** - * acpi_bus_register_driver - register a driver with the ACPI bus - * @driver: driver being registered - * - * Registers a driver with the ACPI bus. Searches the namespace for all - * devices that match the driver's criteria and binds. Returns zero for - * success or a negative error status for failure. - */ -int acpi_bus_register_driver(struct acpi_driver *driver) -{ - int ret; - - if (acpi_disabled) - return -ENODEV; - driver->drv.name = driver->name; - driver->drv.bus = &acpi_bus_type; - driver->drv.owner = driver->owner; - - ret = driver_register(&driver->drv); - return ret; -} - -EXPORT_SYMBOL(acpi_bus_register_driver); - -/** - * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus - * @driver: driver to unregister - * - * Unregisters a driver with the ACPI bus. Searches the namespace for all - * devices that match the driver's criteria and unbinds. - */ -void acpi_bus_unregister_driver(struct acpi_driver *driver) -{ - driver_unregister(&driver->drv); -} - -EXPORT_SYMBOL(acpi_bus_unregister_driver); - -/* -------------------------------------------------------------------------- Device Enumeration -------------------------------------------------------------------------- */ static struct acpi_device *acpi_bus_get_parent(acpi_handle handle) -- cgit v0.10.2 From 1dcc3d3362b0c97e48290f7786be85b4cec2a147 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:54:09 +0200 Subject: ACPI / bus: Move ACPI bus type registration Move the registration of the ACPI bus type to acpi_bus_init() and avoid using ACPI going forward if it fails (too many things depend on the presence of the ACPI bus type). Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 2f50fc4..7a3ad92 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1039,7 +1039,9 @@ static int __init acpi_bus_init(void) */ acpi_root_dir = proc_mkdir(ACPI_BUS_FILE_ROOT, NULL); - return 0; + result = bus_register(&acpi_bus_type); + if (!result) + return 0; /* Mimic structured exception handling */ error1: diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index f541f68..2fe5a37 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1891,12 +1891,6 @@ int __init acpi_scan_init(void) { int result; - result = bus_register(&acpi_bus_type); - if (result) { - /* We don't want to quit even if we failed to add suspend/resume */ - printk(KERN_ERR PREFIX "Could not register bus type\n"); - } - acpi_pci_root_init(); acpi_pci_link_init(); acpi_processor_init(); -- cgit v0.10.2