From e0aac52e17a3db68fe2ceae281780a70fc69957f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 27 Jan 2012 10:45:27 +0900 Subject: ipvs: fix matching of fwmark templates during scheduling Commit f11017ec2d1859c661f4e2b12c4a8d250e1f47cf (2.6.37) moved the fwmark variable in subcontext that is invalidated before reaching the ip_vs_ct_in_get call. As vaddr is provided as pointer in the param structure make sure the fwmark variable is in same context. As the fwmark templates can not be matched, more and more template connections are created and the controlled connections can not go to single real server. Signed-off-by: Julian Anastasov Cc: stable@vger.kernel.org Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 611c335..2555816 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -232,6 +232,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 dport = 0; /* destination port to forward */ unsigned int flags; struct ip_vs_conn_param param; + const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -267,7 +268,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc, { int protocol = iph.protocol; const union nf_inet_addr *vaddr = &iph.daddr; - const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; __be16 vport = 0; if (dst_port == svc->port) { -- cgit v0.10.2 From a8db7b2d197a0d624baab83f0c810b0edbc4ffd0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 6 Feb 2012 13:23:10 +0100 Subject: netfilter: nf_queue: fix queueing of bridged gro skbs When trying to nf_queue GRO/GSO skbs, nf_queue uses skb_gso_segment to split the skb. However, if nf_queue is called via bridge netfilter, the mac header won't be preserved -- packets will thus contain a bogus mac header. Fix this by setting skb->data to the mac header when skb->nf_bridge is set and restoring skb->data afterwards for all segments. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b3a7db6..ce60cf0 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -203,6 +203,27 @@ err: return status; } +#ifdef CONFIG_BRIDGE_NETFILTER +/* When called from bridge netfilter, skb->data must point to MAC header + * before calling skb_gso_segment(). Else, original MAC header is lost + * and segmented skbs will be sent to wrong destination. + */ +static void nf_bridge_adjust_skb_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_push(skb, skb->network_header - skb->mac_header); +} + +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_pull(skb, skb->network_header - skb->mac_header); +} +#else +#define nf_bridge_adjust_skb_data(s) do {} while (0) +#define nf_bridge_adjust_segmented_data(s) do {} while (0) +#endif + int nf_queue(struct sk_buff *skb, struct list_head *elem, u_int8_t pf, unsigned int hook, @@ -212,7 +233,7 @@ int nf_queue(struct sk_buff *skb, unsigned int queuenum) { struct sk_buff *segs; - int err; + int err = -EINVAL; unsigned int queued; if (!skb_is_gso(skb)) @@ -228,23 +249,25 @@ int nf_queue(struct sk_buff *skb, break; } + nf_bridge_adjust_skb_data(skb); segs = skb_gso_segment(skb, 0); /* Does not use PTR_ERR to limit the number of error codes that can be * returned by nf_queue. For instance, callers rely on -ECANCELED to mean * 'ignore this hook'. */ if (IS_ERR(segs)) - return -EINVAL; - + goto out_err; queued = 0; err = 0; do { struct sk_buff *nskb = segs->next; segs->next = NULL; - if (err == 0) + if (err == 0) { + nf_bridge_adjust_segmented_data(segs); err = __nf_queue(segs, elem, pf, hook, indev, outdev, okfn, queuenum); + } if (err == 0) queued++; else @@ -252,11 +275,12 @@ int nf_queue(struct sk_buff *skb, segs = nskb; } while (segs); - /* also free orig skb if only some segments were queued */ - if (unlikely(err && queued)) - err = 0; - if (err == 0) + if (queued) { kfree_skb(skb); + return 0; + } + out_err: + nf_bridge_adjust_segmented_data(skb); return err; } -- cgit v0.10.2 From 18daf1644e634bae951a6e3d4d19d89170209762 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 13 Jan 2012 15:11:30 +0100 Subject: Bluetooth: Fix l2cap conn failures for ssp devices Commit 330605423c fixed l2cap conn establishment for non-ssp remote devices by not setting HCI_CONN_ENCRYPT_PEND every time conn security is tested (which was always returning failure on any subsequent security checks). However, this broke l2cap conn establishment for ssp remote devices when an ACL link was already established at SDP-level security. This fix ensures that encryption must be pending whenever authentication is also pending. Signed-off-by: Peter Hurley Tested-by: Daniel Wagner Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3db4324..07bc69e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -635,6 +635,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; + + /* encrypt must be pending if auth is also pending */ + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); + cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); -- cgit v0.10.2 From 19ad9e94f6e2b4b3e1feccfb2466eb6e3e5b8c2a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 3 Jan 2012 11:53:53 +0100 Subject: Bluetooth: Don't mark non xfer isoc endpoint URBs with URB_ISO_ASAP [ 2096.384084] btusb_send_frame:684: hci0 [ 2096.384087] usb 3-1: BOGUS urb flags, 2 --> 0 [ 2096.384091] Bluetooth: hci0 urb ffff8801b61d3a80 submission failed (22) According the documentation in usb_submit_urb() URB_ISO_ASAP flag is only allowed for endpoints of type USB_ENDPOINT_XFER_ISOC. This reverts commit b8aabfc92249b239c425da7e4ca85b7e4855e984. Signed-off-by: Daniel Wagner Acked-by: Luiz Augusto von Dentz Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index f00f596..b7c4f4e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -726,9 +726,6 @@ static int btusb_send_frame(struct sk_buff *skb) usb_fill_bulk_urb(urb, data->udev, pipe, skb->data, skb->len, btusb_tx_complete, skb); - if (skb->priority >= HCI_PRIO_MAX - 1) - urb->transfer_flags = URB_ISO_ASAP; - hdev->stat.acl_tx++; break; -- cgit v0.10.2 From 4aa832c27edf902130f8bace1d42cf22468823fa Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 8 Jan 2012 22:51:16 +0200 Subject: Bluetooth: Remove bogus inline declaration from l2cap_chan_connect As reported by Dan Carpenter this function causes a Sparse warning and shouldn't be declared inline: include/net/bluetooth/l2cap.h:837:30 error: marked inline, but without a definition" Reported-by: Dan Carpenter Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 68f5891..124f7cf 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -834,7 +834,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(struct sock *sk); void l2cap_chan_close(struct l2cap_chan *chan, int reason); void l2cap_chan_destroy(struct l2cap_chan *chan); -inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, +int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u32 priority); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index faf0b11..980abdb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1120,7 +1120,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr return c1; } -inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst) +int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst) { struct sock *sk = chan->sk; bdaddr_t *src = &bt_sk(sk)->src; -- cgit v0.10.2 From a63752552b95624a9f1dfa3d763870f72f964ad0 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 4 Jan 2012 12:10:41 +0100 Subject: Bluetooth: Fix sk_sndtimeo initialization for L2CAP socket sk_sndtime value should be specified in jiffies thus initial value needs to be converted from miliseconds. Otherwise this timeout is unreliable when CONFIG_HZ is not set to 1000. Signed-off-by: Andrzej Kaczmarek Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c61d967..c57027f 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1002,7 +1002,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); sock_reset_flag(sk, SOCK_ZAPPED); -- cgit v0.10.2 From 6e1da683f79a22fafaada62d547138daaa9e3456 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 4 Jan 2012 12:10:42 +0100 Subject: Bluetooth: l2cap_set_timer needs jiffies as timeout value After moving L2CAP timers to workqueues l2cap_set_timer expects timeout value to be specified in jiffies but constants defined in miliseconds are used. This makes timeouts unreliable when CONFIG_HZ is not set to 1000. __set_chan_timer macro still uses jiffies as input to avoid multiple conversions from/to jiffies for sk_sndtimeo value which is already specified in jiffies. Signed-off-by: Andrzej Kaczmarek Ackec-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 124f7cf..26486e1 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -626,13 +626,13 @@ static inline void l2cap_clear_timer(struct l2cap_chan *chan, #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) #define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \ - L2CAP_DEFAULT_RETRANS_TO); + msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) #define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \ - L2CAP_DEFAULT_MONITOR_TO); + msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO)); #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ - L2CAP_DEFAULT_ACK_TO); + msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); #define __clear_ack_timer(c) l2cap_clear_timer(c, &c->ack_timer) static inline int __seq_offset(struct l2cap_chan *chan, __u16 seq1, __u16 seq2) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 980abdb..dcccae0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2970,7 +2970,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_err = ECONNRESET; - __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_REJ_TIMEOUT)); l2cap_send_disconn_req(conn, chan, ECONNRESET); goto done; } @@ -4478,7 +4479,8 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) if (encrypt == 0x00) { if (chan->sec_level == BT_SECURITY_MEDIUM) { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_ENC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_ENC_TIMEOUT)); } else if (chan->sec_level == BT_SECURITY_HIGH) l2cap_chan_close(chan, ECONNREFUSED); } else { @@ -4546,7 +4548,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) L2CAP_CONN_REQ, sizeof(req), &req); } else { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; @@ -4566,7 +4569,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } } else { l2cap_state_change(chan, BT_DISCONN); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); res = L2CAP_CR_SEC_BLOCK; stat = L2CAP_CS_NO_INFO; } -- cgit v0.10.2 From 331660637b4e5136602a98200a84f6b65ed8d5be Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 4 Jan 2012 11:57:17 -0300 Subject: Bluetooth: Fix using an absolute timeout on hci_conn_put() queue_delayed_work() expects a relative time for when that work should be scheduled. Signed-off-by: Vinicius Costa Gomes Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ea9231f..92b8fea 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -561,7 +561,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } cancel_delayed_work_sync(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, - &conn->disc_work, jiffies + timeo); + &conn->disc_work, timeo); } } -- cgit v0.10.2 From b5a30dda6598af216c070165ece6068f9f00f33a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sun, 22 Jan 2012 00:28:34 +0200 Subject: Bluetooth: silence lockdep warning Since bluetooth uses multiple protocols types, to avoid lockdep warnings, we need to use different lockdep classes (one for each protocol type). This is already done in bt_sock_create but it misses a couple of cases when new connections are created. This patch corrects that to fix the following warning: <4>[ 1864.732366] ======================================================= <4>[ 1864.733030] [ INFO: possible circular locking dependency detected ] <4>[ 1864.733544] 3.0.16-mid3-00007-gc9a0f62 #3 <4>[ 1864.733883] ------------------------------------------------------- <4>[ 1864.734408] t.android.btclc/4204 is trying to acquire lock: <4>[ 1864.734869] (rfcomm_mutex){+.+.+.}, at: [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.735541] <4>[ 1864.735549] but task is already holding lock: <4>[ 1864.736045] (sk_lock-AF_BLUETOOTH){+.+.+.}, at: [] lock_sock+0xa/0xc <4>[ 1864.736732] <4>[ 1864.736740] which lock already depends on the new lock. <4>[ 1864.736750] <4>[ 1864.737428] <4>[ 1864.737437] the existing dependency chain (in reverse order) is: <4>[ 1864.738016] <4>[ 1864.738023] -> #1 (sk_lock-AF_BLUETOOTH){+.+.+.}: <4>[ 1864.738549] [] lock_acquire+0x104/0x140 <4>[ 1864.738977] [] lock_sock_nested+0x58/0x68 <4>[ 1864.739411] [] l2cap_sock_sendmsg+0x3e/0x76 <4>[ 1864.739858] [] __sock_sendmsg+0x50/0x59 <4>[ 1864.740279] [] sock_sendmsg+0x94/0xa8 <4>[ 1864.740687] [] kernel_sendmsg+0x28/0x37 <4>[ 1864.741106] [] rfcomm_send_frame+0x30/0x38 <4>[ 1864.741542] [] rfcomm_send_ua+0x58/0x5a <4>[ 1864.741959] [] rfcomm_run+0x441/0xb52 <4>[ 1864.742365] [] kthread+0x63/0x68 <4>[ 1864.742742] [] kernel_thread_helper+0x6/0xd <4>[ 1864.743187] <4>[ 1864.743193] -> #0 (rfcomm_mutex){+.+.+.}: <4>[ 1864.743667] [] __lock_acquire+0x988/0xc00 <4>[ 1864.744100] [] lock_acquire+0x104/0x140 <4>[ 1864.744519] [] __mutex_lock_common+0x3b/0x33f <4>[ 1864.744975] [] mutex_lock_nested+0x2d/0x36 <4>[ 1864.745412] [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.745842] [] __rfcomm_sock_close+0x5f/0x6b <4>[ 1864.746288] [] rfcomm_sock_shutdown+0x2f/0x62 <4>[ 1864.746737] [] sys_socketcall+0x1db/0x422 <4>[ 1864.747165] [] syscall_call+0x7/0xb Signed-off-by: Octavian Purdila Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index abaad6e..4a82ca0 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -256,4 +256,6 @@ void l2cap_exit(void); int sco_init(void); void sco_exit(void); +void bt_sock_reclassify_lock(struct sock *sk, int proto); + #endif /* __BLUETOOTH_H */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ef92864..72eb187 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -71,19 +71,16 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_AVDTP", }; -static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) +void bt_sock_reclassify_lock(struct sock *sk, int proto) { - struct sock *sk = sock->sk; - - if (!sk) - return; - + BUG_ON(!sk); BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } +EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { @@ -145,7 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto, if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); - bt_sock_reclassify_lock(sock, proto); + if (!err) + bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c57027f..401d942 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -849,6 +849,8 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) if (!sk) return NULL; + bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); + l2cap_sock_init(sk, parent); return l2cap_pi(sk)->chan; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f066678..22169c3 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -956,6 +956,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!sk) goto done; + bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM); + rfcomm_sock_init(sk, parent); bacpy(&bt_sk(sk)->src, &src); bacpy(&bt_sk(sk)->dst, &dst); -- cgit v0.10.2 From a51cd2be864a3cc0272359b1995e213341dfc7e7 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jan 2012 19:42:02 -0300 Subject: Bluetooth: Fix potential deadlock We don't need to use the _sync variant in hci_conn_hold and hci_conn_put to cancel conn->disc_work delayed work. This way we avoid potential deadlocks like this one reported by lockdep. ====================================================== [ INFO: possible circular locking dependency detected ] 3.2.0+ #1 Not tainted ------------------------------------------------------- kworker/u:1/17 is trying to acquire lock: (&hdev->lock){+.+.+.}, at: [] hci_conn_timeout+0x62/0x158 [bluetooth] but task is already holding lock: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 ((&(&conn->disc_work)->work)){+.+...}: [] lock_acquire+0x8a/0xa7 [] wait_on_work+0x3d/0xaa [] __cancel_work_timer+0xac/0xef [] cancel_delayed_work_sync+0xd/0xf [] smp_chan_create+0xde/0xe6 [bluetooth] [] smp_conn_security+0xa3/0x12d [bluetooth] [] l2cap_connect_cfm+0x237/0x2e8 [bluetooth] [] hci_proto_connect_cfm+0x2d/0x6f [bluetooth] [] hci_event_packet+0x29d1/0x2d60 [bluetooth] [] hci_rx_work+0xd0/0x2e1 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 -> #1 (slock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+...}: [] lock_acquire+0x8a/0xa7 [] _raw_spin_lock_bh+0x36/0x6a [] lock_sock_nested+0x24/0x7f [] lock_sock+0xb/0xd [bluetooth] [] l2cap_chan_connect+0xa9/0x26f [bluetooth] [] l2cap_sock_connect+0xb3/0xff [bluetooth] [] sys_connect+0x69/0x8a [] system_call_fastpath+0x16/0x1b -> #0 (&hdev->lock){+.+.+.}: [] __lock_acquire+0xa80/0xd74 [] lock_acquire+0x8a/0xa7 [] __mutex_lock_common+0x48/0x38e [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 other info that might help us debug this: Chain exists of: &hdev->lock --> slock-AF_BLUETOOTH-BTPROTO_L2CAP --> (&(&conn->disc_work)->work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((&(&conn->disc_work)->work)); lock(slock-AF_BLUETOOTH-BTPROTO_L2CAP); lock((&(&conn->disc_work)->work)); lock(&hdev->lock); *** DEADLOCK *** 2 locks held by kworker/u:1/17: #0: (hdev->name){.+.+.+}, at: [] process_one_work+0x11a/0x2bf #1: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf stack backtrace: Pid: 17, comm: kworker/u:1 Not tainted 3.2.0+ #1 Call Trace: [] print_circular_bug+0x1f8/0x209 [] __lock_acquire+0xa80/0xd74 [] ? arch_local_irq_restore+0x6/0xd [] ? vprintk+0x3f9/0x41e [] lock_acquire+0x8a/0xa7 [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] __mutex_lock_common+0x48/0x38e [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? __dynamic_pr_debug+0x6d/0x6f [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? trace_hardirqs_off+0xd/0xf [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] ? process_one_work+0x11a/0x2bf [] ? lock_acquired+0x1d0/0x1df [] ? hci_acl_disconn+0x65/0x65 [bluetooth] [] worker_thread+0xce/0x152 [] ? finish_task_switch+0x45/0xc5 [] ? manage_workers.isra.25+0x16a/0x16a [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? __init_kthread_worker+0x55/0x55 [] ? gs_change+0x13/0x13 Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 92b8fea..453893b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -540,7 +540,7 @@ void hci_conn_put_device(struct hci_conn *conn); static inline void hci_conn_hold(struct hci_conn *conn) { atomic_inc(&conn->refcnt); - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_put(struct hci_conn *conn) @@ -559,7 +559,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } else { timeo = msecs_to_jiffies(10); } - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, &conn->disc_work, timeo); } -- cgit v0.10.2 From cf33e77b76d7439f21a23a94eab4ab3b405a6a7d Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Fri, 27 Jan 2012 19:32:39 +0200 Subject: Bluetooth: Fix RFCOMM session reference counting issue There is an imbalance in the rfcomm_session_hold / rfcomm_session_put operations which causes the following crash: [ 685.010159] BUG: unable to handle kernel paging request at 6b6b6b6b [ 685.010169] IP: [] rfcomm_process_dlcs+0x1b/0x15e [ 685.010181] *pdpt = 000000002d665001 *pde = 0000000000000000 [ 685.010191] Oops: 0000 [#1] PREEMPT SMP [ 685.010247] [ 685.010255] Pid: 947, comm: krfcommd Tainted: G C 3.0.16-mid8-dirty #44 [ 685.010266] EIP: 0060:[] EFLAGS: 00010246 CPU: 1 [ 685.010274] EIP is at rfcomm_process_dlcs+0x1b/0x15e [ 685.010281] EAX: e79f551c EBX: 6b6b6b6b ECX: 00000007 EDX: e79f40b4 [ 685.010288] ESI: e79f4060 EDI: ed4e1f70 EBP: ed4e1f68 ESP: ed4e1f50 [ 685.010295] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [ 685.010303] Process krfcommd (pid: 947, ti=ed4e0000 task=ed43e5e0 task.ti=ed4e0000) [ 685.010308] Stack: [ 685.010312] ed4e1f68 c149eb53 e5925150 e79f4060 ed500000 ed4e1f70 ed4e1f80 c149ec10 [ 685.010331] 00000000 ed43e5e0 00000000 ed4e1f90 ed4e1f9c c149ec87 0000bf54 00000000 [ 685.010348] 00000000 ee03bf54 c149ec37 ed4e1fe4 c104fe01 00000000 00000000 00000000 [ 685.010367] Call Trace: [ 685.010376] [] ? rfcomm_process_rx+0x6e/0x74 [ 685.010387] [] rfcomm_process_sessions+0xb7/0xde [ 685.010398] [] rfcomm_run+0x50/0x6d [ 685.010409] [] ? rfcomm_process_sessions+0xde/0xde [ 685.010419] [] kthread+0x63/0x68 [ 685.010431] [] ? __init_kthread_worker+0x42/0x42 [ 685.010442] [] kernel_thread_helper+0x6/0xd This issue has been brought up earlier here: https://lkml.org/lkml/2011/5/21/127 The issue appears to be the rfcomm_session_put in rfcomm_recv_ua. This operation doesn't seem be to required as for the non-initiator case we have the rfcomm_process_rx doing an explicit put and in the initiator case the last dlc_unlink will drive the reference counter to 0. There have been several attempts to fix these issue: 6c2718d Bluetooth: Do not call rfcomm_session_put() for RFCOMM UA on closed socket 683d949 Bluetooth: Never deallocate a session when some DLC points to it but AFAICS they do not fix the issue just make it harder to reproduce. Signed-off-by: Octavian Purdila Signed-off-by: Gopala Krishna Murala Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 501649b..8a60238 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1164,12 +1164,18 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - /* When socket is closed and we are not RFCOMM - * initiator rfcomm_process_rx already calls - * rfcomm_session_put() */ - if (s->sock->sk->sk_state != BT_CLOSED) - if (list_empty(&s->dlcs)) - rfcomm_session_put(s); + /* rfcomm_session_put is called later so don't do + * anything here otherwise we will mess up the session + * reference counter: + * + * (a) when we are the initiator dlc_unlink will drive + * the reference counter to 0 (there is no initial put + * after session_add) + * + * (b) when we are not the initiator rfcomm_rx_process + * will explicitly call put to balance the initial hold + * done after session add. + */ break; } } -- cgit v0.10.2 From ca0d6c7ece0e78268cd7c5c378d6b1b610625085 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 3 Feb 2012 21:29:40 +0200 Subject: Bluetooth: Add missing QUIRK_NO_RESET test to hci_dev_do_close We should only perform a reset in hci_dev_do_close if the HCI_QUIRK_NO_RESET flag is set (since in such a case a reset will not be performed when initializing the device). Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9de9371..5aeb624 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -640,7 +640,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (!test_bit(HCI_RAW, &hdev->flags)) { + if (!test_bit(HCI_RAW, &hdev->flags) && + test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_request(hdev, hci_reset_req, 0, msecs_to_jiffies(250)); -- cgit v0.10.2 From 403f048a57050add364827fb3e2650af86463168 Mon Sep 17 00:00:00 2001 From: Manoj Iyer Date: Thu, 2 Feb 2012 09:32:36 -0600 Subject: Bluetooth: btusb: Add vendor specific ID (0a5c 21f3) for BCM20702A0 T: Bus=01 Lev=02 Prnt=02 Port=03 Cnt=03 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0a5c ProdID=21f3 Rev=01.12 S: Manufacturer=Broadcom Corp S: Product=BCM20702A0 S: SerialNumber=74DE2B344A7B C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=0mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Signed-off-by: Manoj Iyer Tested-by: Dennis Chua Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b7c4f4e..789c9b5 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -102,6 +102,7 @@ static struct usb_device_id btusb_table[] = { /* Broadcom BCM20702A0 */ { USB_DEVICE(0x0a5c, 0x21e3) }, + { USB_DEVICE(0x0a5c, 0x21f3) }, { USB_DEVICE(0x413c, 0x8197) }, { } /* Terminating entry */ -- cgit v0.10.2 From 6de32750822d00bfa92c341166132b0714c5b559 Mon Sep 17 00:00:00 2001 From: Ulisses Furquim Date: Mon, 30 Jan 2012 18:26:28 -0200 Subject: Bluetooth: Remove usage of __cancel_delayed_work() __cancel_delayed_work() is being used in some paths where we cannot sleep waiting for the delayed work to finish. However, that function might return while the timer is running and the work will be queued again. Replace the calls with safer cancel_delayed_work() version which spins until the timer handler finishes on other CPUs and cancels the delayed work. Signed-off-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 26486e1..b1664ed 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -611,7 +611,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, { BT_DBG("chan %p state %d timeout %ld", chan, chan->state, timeout); - if (!__cancel_delayed_work(work)) + if (!cancel_delayed_work(work)) l2cap_chan_hold(chan); schedule_delayed_work(work, timeout); } @@ -619,7 +619,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, static inline void l2cap_clear_timer(struct l2cap_chan *chan, struct delayed_work *work) { - if (__cancel_delayed_work(work)) + if (cancel_delayed_work(work)) l2cap_chan_put(chan); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dcccae0..ec10c69 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2574,7 +2574,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -3121,7 +3121,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; @@ -4501,7 +4501,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) if (hcon->type == LE_LINK) { smp_distribute_keys(conn, 0); - __cancel_delayed_work(&conn->security_timer); + cancel_delayed_work(&conn->security_timer); } rcu_read_lock(); -- cgit v0.10.2 From 24d2b8c0ac5c8ec41c26ed432238b0e027184882 Mon Sep 17 00:00:00 2001 From: Ulisses Furquim Date: Mon, 30 Jan 2012 18:26:29 -0200 Subject: Bluetooth: Fix possible use after free in delete path We need to use the _sync() version for cancelling the info and security timer in the L2CAP connection delete path. Otherwise the delayed work handler might run after the connection object is freed. Signed-off-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ec10c69..32d338c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1018,10 +1018,10 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) hci_chan_del(conn->hchan); if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work_sync(&conn->info_timer); if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->pend)) { - __cancel_delayed_work(&conn->security_timer); + cancel_delayed_work_sync(&conn->security_timer); smp_chan_destroy(conn); } -- cgit v0.10.2 From 4b5a433ae5348c23caa0b5f0a2fca7c342acb200 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Jan 2012 13:55:23 +0100 Subject: mac80211: call rate control only after init There are situations where we don't have the necessary rate control information yet for station entries, e.g. when associating. This currently doesn't really happen due to the dummy station handling; explicitly disabling rate control when it's not initialised will allow us to remove dummy stations. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 2406b3e..d86217d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -63,14 +63,14 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", TEST(AUTH), TEST(ASSOC), TEST(PS_STA), TEST(PS_DRIVER), TEST(AUTHORIZED), TEST(SHORT_PREAMBLE), TEST(WME), TEST(WDS), TEST(CLEAR_PS_FILT), TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL), TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), - TEST(TDLS_PEER_AUTH)); + TEST(TDLS_PEER_AUTH), TEST(RATE_CONTROL)); #undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 5a5a776..ad64f4d 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -336,7 +336,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, int i; u32 mask; - if (sta) { + if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { ista = &sta->sta; priv_sta = sta->rate_ctrl_priv; } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 168427b..2b83f32 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -62,6 +62,7 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; ref->ops->rate_init(ref->priv, sband, ista, priv_sta); + set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } static inline void rate_control_rate_update(struct ieee80211_local *local, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 6f77f12..bfed851 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -52,6 +52,7 @@ * @WLAN_STA_SP: Station is in a service period, so don't try to * reply to other uAPSD trigger frames or PS-Poll. * @WLAN_STA_4ADDR_EVENT: 4-addr event was already sent for this frame. + * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -71,6 +72,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_UAPSD, WLAN_STA_SP, WLAN_STA_4ADDR_EVENT, + WLAN_STA_RATE_CONTROL, }; enum ieee80211_sta_state { -- cgit v0.10.2 From 216c57b214bd621335ff698b475f6db2802502dc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 8 Feb 2012 19:17:11 +0100 Subject: mac80211: do not call rate control .tx_status before .rate_init Most rate control implementations assume .get_rate and .tx_status are only called once the per-station data has been fully initialized. minstrel_ht crashes if this assumption is violated. Signed-off-by: Felix Fietkau Tested-by: Arend van Spriel Signed-off-by: John W. Linville diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 2b83f32..80cfc00 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -41,7 +41,7 @@ static inline void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; - if (!ref) + if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) return; ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); -- cgit v0.10.2 From 6670f15b1f6858a43b292d8ab64464e9f085a6aa Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Thu, 9 Feb 2012 18:32:22 -0800 Subject: mwifiex: clear previous security setting during association Driver maintains different flags for WEP, WPA, WPA2 security modes. Appropriate flag is set using security information provided in connect request. mwifiex_is_network_compatible() routine uses them to check if driver's setting is compatible with AP. Association is aborted if the routine fails. For some corner cases, it is observed that association is failed even for valid security information based on association history. This patch fixes the problem by clearing previous security setting during each association. We should set WEP key provided in connect request as default tx key. This missing change is also added here. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index c3b6c46..5b2972b 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -841,7 +841,12 @@ mwifiex_cfg80211_assoc(struct mwifiex_private *priv, size_t ssid_len, u8 *ssid, ret = mwifiex_set_rf_channel(priv, channel, priv->adapter->channel_type); - ret = mwifiex_set_encode(priv, NULL, 0, 0, 1); /* Disable keys */ + /* As this is new association, clear locally stored + * keys and security related flags */ + priv->sec_info.wpa_enabled = false; + priv->sec_info.wpa2_enabled = false; + priv->wep_key_curr_index = 0; + ret = mwifiex_set_encode(priv, NULL, 0, 0, 1); if (mode == NL80211_IFTYPE_ADHOC) { /* "privacy" is set only for ad-hoc mode */ @@ -886,6 +891,7 @@ mwifiex_cfg80211_assoc(struct mwifiex_private *priv, size_t ssid_len, u8 *ssid, dev_dbg(priv->adapter->dev, "info: setting wep encryption" " with key len %d\n", sme->key_len); + priv->wep_key_curr_index = sme->key_idx; ret = mwifiex_set_encode(priv, sme->key, sme->key_len, sme->key_idx, 0); } -- cgit v0.10.2 From 2504a6423b9ab4c36df78227055995644de19edb Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Sat, 11 Feb 2012 10:01:53 -0500 Subject: ath9k: stop on rates with idx -1 in ath9k rate control's .tx_status Rate control algorithms are supposed to stop processing when they encounter a rate with the index -1. Checking for rate->count not being zero is not enough. Allowing a rate with negative index leads to memory corruption in ath_debug_stat_rc(). One consequence of the bug is discussed at https://bugzilla.redhat.com/show_bug.cgi?id=768639 Signed-off-by: Pavel Roskin Cc: stable@vger.kernel.org Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 635b592..a427a16 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1346,7 +1346,7 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband, fc = hdr->frame_control; for (i = 0; i < sc->hw->max_rates; i++) { struct ieee80211_tx_rate *rate = &tx_info->status.rates[i]; - if (!rate->count) + if (rate->idx < 0 || !rate->count) break; final_ts_idx = i; -- cgit v0.10.2 From cd961c2ca98efbe7d738ca8720673fc03538b2b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Feb 2012 20:28:25 +0000 Subject: netem: fix dequeue commit 50612537e9 (netem: fix classful handling) added two errors in netem_dequeue() 1) After checking skb at the head of tfifo queue for time constraints, it dequeues tail skb, thus adding unwanted reordering. 2) qdisc stats are updated twice per packet (one when packet dequeued from tfifo, once when delivered) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e83d61c..5da548f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -501,9 +501,8 @@ tfifo_dequeue: /* if more time remaining? */ if (cb->time_to_send <= psched_get_time()) { - skb = qdisc_dequeue_tail(sch); - if (unlikely(!skb)) - goto qdisc_dequeue; + __skb_unlink(skb, &sch->q); + sch->qstats.backlog -= qdisc_pkt_len(skb); #ifdef CONFIG_NET_CLS_ACT /* @@ -539,7 +538,6 @@ deliver: qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } -qdisc_dequeue: if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); if (skb) -- cgit v0.10.2 From 11aad99af6ef629ff3b05d1c9f0936589b204316 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Feb 2012 20:43:11 +0000 Subject: atl1c: dont use highprio tx queue This driver attempts to use two TX rings but lacks proper support : 1) IRQ handler only takes care of TX completion on first TX ring 2) the stop/start logic uses the legacy functions (for non multiqueue drivers) This means all packets witk skb mark set to 1 are sent through high queue but are never cleaned and queue eventualy fills and block the device, triggering the infamous "NETDEV WATCHDOG" message. Lets use a single TX ring to fix the problem, this driver is not a real multiqueue one yet. Minimal fix for stable kernels. Reported-by: Thomas Meyer Tested-by: Thomas Meyer Signed-off-by: Eric Dumazet Cc: Jay Cliburn Cc: Chris Snook Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index b859124..1ff3c6d 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2244,10 +2244,6 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, dev_info(&adapter->pdev->dev, "tx locked\n"); return NETDEV_TX_LOCKED; } - if (skb->mark == 0x01) - type = atl1c_trans_high; - else - type = atl1c_trans_normal; if (atl1c_tpd_avail(adapter, type) < tpd_req) { /* no enough descriptor, just stop queue */ -- cgit v0.10.2 From b203262de63c56393d09e254242b57c002d8619d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 16 Feb 2012 01:48:56 +0000 Subject: vmxnet3: cap copy length at size of skb to prevent dropped frames on tx I was recently shown that vmxnet3 devices on transmit, will drop very small udp frames consistently. This is due to a regression introduced by commit 39d4a96fd7d2926e46151adbd18b810aeeea8ec0. This commit attempts to introduce an optimization to the tx path, indicating that the underlying hardware behaves optimally when at least 54 bytes of header data are available for direct access. This causes problems however, if the entire frame is less than 54 bytes long. The subsequent pskb_may_pull in vmxnet3_parse_and_copy_hdr fails, causing an error return code, which leads to vmxnet3_tq_xmit dropping the frame. Fix it by placing a cap on the copy length. For frames longer than 54 bytes, we do the pull as we normally would. If the frame is shorter than that, copy the whole frame, but no more. This ensures that we still get the optimization for qualifying frames, but don't do any damange for frames that are too short. Also, since I'm unable to do this, it wuold be great if vmware could follow up this patch with some additional code commentary as to why 54 bytes is an optimal pull length for a virtual NIC driver. The comment that introduced this was vague on that. Thanks! Signed-off-by: Neil Horman Reported-by: Max Matveev CC: Max Matveev CC: "David S. Miller" CC: Shreyas Bhatewara CC: "VMware, Inc." Signed-off-by: Shreyas N Bhatewara Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index de7fc34..3dcd385 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -843,8 +843,8 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, /* for simplicity, don't copy L4 headers */ ctx->l4_hdr_size = 0; } - ctx->copy_size = ctx->eth_ip_hdr_size + - ctx->l4_hdr_size; + ctx->copy_size = min(ctx->eth_ip_hdr_size + + ctx->l4_hdr_size, skb->len); } else { ctx->eth_ip_hdr_size = 0; ctx->l4_hdr_size = 0; -- cgit v0.10.2 From 32aa64f77e032511079fec5ec0465aeb6d6c3891 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Feb 2012 20:44:33 +0000 Subject: net/ethernet: ks8851_mll: signedness bug in ks8851_probe() netdev->irq is unsigned, so it's never less than zero. Signed-off-by: Dan Carpenter Tested-by: Jan Weitzel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 231176f..2784bc7 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -1545,7 +1545,7 @@ static int __devinit ks8851_probe(struct platform_device *pdev) netdev->irq = platform_get_irq(pdev, 0); - if (netdev->irq < 0) { + if ((int)netdev->irq < 0) { err = netdev->irq; goto err_get_irq; } -- cgit v0.10.2 From 8ae0cfee2a727f698bda12e530f4879a9793c6c9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 19 Feb 2012 09:43:32 +0000 Subject: drivers/atm/solos-pci.c: exchange pci_iounmaps The calls to pci_iounmap are in the wrong order, as compared to the associated calls to pci_iomap. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,x; statement S,S1; int ret; @@ e = pci_iomap(x,...) ... when != pci_iounmap(x,e) if (<+...e...+>) S ... when any when != pci_iounmap(x,e) *if (...) { ... when != pci_iounmap(x,e) return ...; } ... when any pci_iounmap(x,e); // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 5d1d076..e8cd652 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -1206,9 +1206,9 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) out_unmap_both: pci_set_drvdata(dev, NULL); - pci_iounmap(dev, card->config_regs); - out_unmap_config: pci_iounmap(dev, card->buffers); + out_unmap_config: + pci_iounmap(dev, card->config_regs); out_release_regions: pci_release_regions(dev); out: -- cgit v0.10.2 From 64f0a836f600e9c31ffd511713ab5d328aa96ac8 Mon Sep 17 00:00:00 2001 From: Nikola Pajkovsky Date: Sun, 19 Feb 2012 10:47:43 +0000 Subject: b44: remove __exit from b44_pci_exit() WARNING: drivers/net/ethernet/broadcom/built-in.o(.init.text+0x5d): Section mismatch in reference from the function b44_init() to the function .exit.text:b44_pci_exit() module exits with b44_cleanup() Signed-off-by: Nikola Pajkovsky Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 3fb66d0..cab8745 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2339,7 +2339,7 @@ static inline int __init b44_pci_init(void) return err; } -static inline void __exit b44_pci_exit(void) +static inline void b44_pci_exit(void) { #ifdef CONFIG_B44_PCI ssb_pcihost_unregister(&b44_pci_driver); -- cgit v0.10.2 From a7762b10c12a70c5dbf2253142764b728ac88c3a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 15 Feb 2012 17:51:56 +0100 Subject: can: sja1000: fix isr hang when hw is unplugged under load In the case of hotplug enabled devices (PCMCIA/PCIeC) the removal of the hardware can cause an infinite loop in the common sja1000 isr. Use the already retrieved status register to indicate a possible hardware removal and double check by reading the mode register in sja1000_is_absent. Cc: stable@kernel.org [3.2+] Signed-off-by: Oliver Hartkopp Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 04a3f1b..192b0d1 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -95,11 +95,16 @@ static void sja1000_write_cmdreg(struct sja1000_priv *priv, u8 val) spin_unlock_irqrestore(&priv->cmdreg_lock, flags); } +static int sja1000_is_absent(struct sja1000_priv *priv) +{ + return (priv->read_reg(priv, REG_MOD) == 0xFF); +} + static int sja1000_probe_chip(struct net_device *dev) { struct sja1000_priv *priv = netdev_priv(dev); - if (priv->reg_base && (priv->read_reg(priv, 0) == 0xFF)) { + if (priv->reg_base && sja1000_is_absent(priv)) { printk(KERN_INFO "%s: probing @0x%lX failed\n", DRV_NAME, dev->base_addr); return 0; @@ -493,6 +498,9 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) while ((isrc = priv->read_reg(priv, REG_IR)) && (n < SJA1000_MAX_IRQ)) { n++; status = priv->read_reg(priv, REG_SR); + /* check for absent controller due to hw unplug */ + if (status == 0xFF && sja1000_is_absent(priv)) + return IRQ_NONE; if (isrc & IRQ_WUI) dev_warn(dev->dev.parent, "wakeup interrupt\n"); @@ -509,6 +517,9 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) while (status & SR_RBS) { sja1000_rx(dev); status = priv->read_reg(priv, REG_SR); + /* check for absent controller */ + if (status == 0xFF && sja1000_is_absent(priv)) + return IRQ_NONE; } } if (isrc & (IRQ_DOI | IRQ_EI | IRQ_BEI | IRQ_EPI | IRQ_ALI)) { -- cgit v0.10.2 From 3d7474734b220ccbf9997ea484d0bcd4f7ab8549 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 19 Feb 2012 21:38:52 +0000 Subject: mlx4_core: Do not map BF area if capability is 0 BF can be disabled in some cases, the capability field, bf_reg_size is set to zero in this case. Don't map the BF area in this case, it would cause failures. In addition, leaving the BF area unmapped also alerts the ETH driver to not use BF. Signed-off-by: Jack Morgenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 678558b..9c5fbad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -986,6 +986,9 @@ static int map_bf_area(struct mlx4_dev *dev) resource_size_t bf_len; int err = 0; + if (!dev->caps.bf_reg_size) + return -ENXIO; + bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); bf_len = pci_resource_len(dev->pdev, 2) - -- cgit v0.10.2 From 88ba136d6635b262f77cc418d536115fb8e4d4ab Mon Sep 17 00:00:00 2001 From: Joerg Willmann Date: Tue, 21 Feb 2012 13:26:14 +0100 Subject: netfilter: ebtables: fix alignment problem in ppc ebt_among extension of ebtables uses __alignof__(_xt_align) while the corresponding kernel module uses __alignof__(ebt_replace) to determine the alignment in EBT_ALIGN(). These are the results of these values on different platforms: x86 x86_64 ppc __alignof__(_xt_align) 4 8 8 __alignof__(ebt_replace) 4 8 4 ebtables fails to add rules which use the among extension. I'm using kernel 2.6.33 and ebtables 2.0.10-4 According to Bart De Schuymer, userspace alignment was changed to _xt_align to fix an alignment issue on a userspace32-kernel64 system (he thinks it was for an ARM device). So userspace must be right. The kernel alignment macro needs to change so it also uses _xt_align instead of ebt_replace. The userspace changes date back from June 29, 2009. Signed-off-by: Joerg Willmann Signed-off by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8797ed1..4dd5bd6 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -285,8 +285,8 @@ struct ebt_table { struct module *me; }; -#define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ - ~(__alignof__(struct ebt_replace)-1)) +#define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ + ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -- cgit v0.10.2 From af14cca162ddcdea017b648c21b9b091e4bf1fa4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 21 Feb 2012 14:53:57 +0100 Subject: netfilter: ctnetlink: fix soft lockup when netlink adds new entries Marcell Zambo and Janos Farago noticed and reported that when new conntrack entries are added via netlink and the conntrack table gets full, soft lockup happens. This is because the nf_conntrack_lock is held while nf_conntrack_alloc is called, which is in turn wants to lock nf_conntrack_lock while evicting entries from the full table. The patch fixes the soft lockup with limiting the holding of the nf_conntrack_lock to the minimum, where it's absolutely required. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b03..cc70517 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); - spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1469,7 +1466,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); + spin_lock_bh(&nf_conntrack_lock); nf_conntrack_hash_insert(ct); + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); rcu_read_unlock(); return ct; @@ -1490,6 +1490,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; u16 zone; int err; @@ -1512,25 +1513,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, zone, &otuple); + h = nf_conntrack_find_get(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, zone, &rtuple); + h = nf_conntrack_find_get(net, zone, &rtuple); + spin_unlock_bh(&nf_conntrack_lock); if (h == NULL) { err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - struct nf_conn *ct; enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); - if (IS_ERR(ct)) { - err = PTR_ERR(ct); - goto out_unlock; - } + if (IS_ERR(ct)) + return PTR_ERR(ct); + err = 0; - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = IPCT_RELATED; else @@ -1545,23 +1543,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); + } return err; } /* implicit 'else' */ - /* We manipulate the conntrack inside the global conntrack table lock, - * so there's no need to increase the refcount */ err = -EEXIST; + ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - + spin_lock_bh(&nf_conntrack_lock); err = ctnetlink_change_conntrack(ct, cda); + spin_unlock_bh(&nf_conntrack_lock); if (err == 0) { - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1570,15 +1564,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_MARK), ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); - nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); - - return err; + } } -out_unlock: - spin_unlock_bh(&nf_conntrack_lock); + nf_ct_put(ct); return err; } -- cgit v0.10.2 From 15103aa7a033d7d671c75cc3a71d772dbcbae61e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 20 Feb 2012 17:28:13 +0000 Subject: zaurus: Add ID for C-750/C-760/C-860/SL-C3000 PDA in MDLM mode In 16adf5d07987d93675945f3cecf0e33706566005 I removed an over-broad alias that caused zaurus.ko to bind to unrelated devices. I had a report that at least one valid case no longer auto-loads because of this. This patch adds an ID for that case. Reported-by: Raphael Wimmer Signed-off-by: Dave Jones Signed-off-by: David S. Miller diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index f701d41..ed2caed 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -316,6 +316,11 @@ static const struct usb_device_id products [] = { ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { + /* C-750/C-760/C-860/SL-C3000 PDA in MDLM mode */ + USB_DEVICE_AND_INTERFACE_INFO(0x04DD, 0x9031, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &bogus_mdlm_info, +}, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, -- cgit v0.10.2 From 730c41d5ba583a9608300fc4e6cf236957cfe02a Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 21 Feb 2012 03:39:32 +0000 Subject: mlx4: Replacing pool_lock with mutex Under the spinlock we call request_irq(), which allocates memory with GFP_KERNEL, This causes the following trace when DEBUG_SPINLOCK is enabled, it can cause the following trace: BUG: spinlock wrong CPU on CPU#2, ethtool/2595 lock: ffff8801f9cbc2b0, .magic: dead4ead, .owner: ethtool/2595, .owner_cpu: 0 Pid: 2595, comm: ethtool Not tainted 3.0.18 #2 Call Trace: spin_bug+0xa2/0xf0 do_raw_spin_unlock+0x71/0xa0 _raw_spin_unlock+0xe/0x10 mlx4_assign_eq+0x12b/0x190 [mlx4_core] mlx4_en_activate_cq+0x252/0x2d0 [mlx4_en] ? mlx4_en_activate_rx_rings+0x227/0x370 [mlx4_en] mlx4_en_start_port+0x189/0xb90 [mlx4_en] mlx4_en_set_ringparam+0x29a/0x340 [mlx4_en] dev_ethtool+0x816/0xb10 ? dev_get_by_name_rcu+0xa4/0xe0 dev_ioctl+0x2b5/0x470 handle_mm_fault+0x1cd/0x2d0 sock_do_ioctl+0x5d/0x70 sock_ioctl+0x79/0x2f0 do_vfs_ioctl+0x8c/0x340 sys_ioctl+0xa1/0xb0 system_call_fastpath+0x16/0x1b Replacing with mutex, which is enough in this case. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8fa41f3..9129ace0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1036,7 +1036,7 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector) struct mlx4_priv *priv = mlx4_priv(dev); int vec = 0, err = 0, i; - spin_lock(&priv->msix_ctl.pool_lock); + mutex_lock(&priv->msix_ctl.pool_lock); for (i = 0; !vec && i < dev->caps.comp_pool; i++) { if (~priv->msix_ctl.pool_bm & 1ULL << i) { priv->msix_ctl.pool_bm |= 1ULL << i; @@ -1058,7 +1058,7 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector) eq_set_ci(&priv->eq_table.eq[vec], 1); } } - spin_unlock(&priv->msix_ctl.pool_lock); + mutex_unlock(&priv->msix_ctl.pool_lock); if (vec) { *vector = vec; @@ -1079,13 +1079,13 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) if (likely(i >= 0)) { /*sanity check , making sure were not trying to free irq's Belonging to a legacy EQ*/ - spin_lock(&priv->msix_ctl.pool_lock); + mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); } - spin_unlock(&priv->msix_ctl.pool_lock); + mutex_unlock(&priv->msix_ctl.pool_lock); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 9c5fbad..5c655a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1828,7 +1828,7 @@ slave_start: goto err_master_mfunc; priv->msix_ctl.pool_bm = 0; - spin_lock_init(&priv->msix_ctl.pool_lock); + mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c92269f..28f8251 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -697,7 +697,7 @@ struct mlx4_sense { struct mlx4_msix_ctl { u64 pool_bm; - spinlock_t pool_lock; + struct mutex pool_lock; }; struct mlx4_steer { -- cgit v0.10.2 From 3d8f93083be54696d3b7f8e8c6c70d411d01f9e8 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 21 Feb 2012 03:41:07 +0000 Subject: mlx4: Setting new port types after all interfaces unregistered In port type change flow, need to set the new port types only after all interfaces have finished the unregister process. Otherwise, during unregister, one of the interfaces might issue a SET_PORT command with wrong port types, it can cause bad FW behavior. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5c655a2..32f8799 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -531,15 +531,14 @@ int mlx4_change_port_types(struct mlx4_dev *dev, for (port = 0; port < dev->caps.num_ports; port++) { /* Change the port type only if the new type is different * from the current, and not set to Auto */ - if (port_types[port] != dev->caps.port_type[port + 1]) { + if (port_types[port] != dev->caps.port_type[port + 1]) change = 1; - dev->caps.port_type[port + 1] = port_types[port]; - } } if (change) { mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); + dev->caps.port_type[port + 1] = port_types[port]; err = mlx4_SET_PORT(dev, port); if (err) { mlx4_err(dev, "Failed to set port %d, " -- cgit v0.10.2 From 84338a6c9dbb6ff3de4749864020f8f25d86fc81 Mon Sep 17 00:00:00 2001 From: Michel Machado Date: Tue, 21 Feb 2012 16:04:13 -0500 Subject: neighbour: Fixed race condition at tbl->nht When the fixed race condition happens: 1. While function neigh_periodic_work scans the neighbor hash table pointed by field tbl->nht, it unlocks and locks tbl->lock between buckets in order to call cond_resched. 2. Assume that function neigh_periodic_work calls cond_resched, that is, the lock tbl->lock is available, and function neigh_hash_grow runs. 3. Once function neigh_hash_grow finishes, and RCU calls neigh_hash_free_rcu, the original struct neigh_hash_table that function neigh_periodic_work was using doesn't exist anymore. 4. Once back at neigh_periodic_work, whenever the old struct neigh_hash_table is accessed, things can go badly. Signed-off-by: Michel Machado Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e287346..2a83914 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -826,6 +826,8 @@ next_elt: write_unlock_bh(&tbl->lock); cond_resched(); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); } /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 -- cgit v0.10.2 From 115c9b81928360d769a76c632bae62d15206a94a Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 21 Feb 2012 16:54:48 -0500 Subject: rtnetlink: Fix problem with buffer allocation Implement a new netlink attribute type IFLA_EXT_MASK. The mask is a 32 bit value that can be used to indicate to the kernel that certain extended ifinfo values are requested by the user application. At this time the only mask value defined is RTEXT_FILTER_VF to indicate that the user wants the ifinfo dump to send information about the VFs belonging to the interface. This patch fixes a bug in which certain applications do not have large enough buffers to accommodate the extra information returned by the kernel with large numbers of SR-IOV virtual functions. Those applications will not send the new netlink attribute with the interface info dump request netlink messages so they will not get unexpectedly large request buffers returned by the kernel. Modifies the rtnl_calcit function to traverse the list of net devices and compute the minimum buffer size that can hold the info dumps of all matching devices based upon the filter passed in via the new netlink attribute filter mask. If no filter mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE. With this change it is possible to add yet to be defined netlink attributes to the dump request which should make it fairly extensible in the future. Signed-off-by: Greg Rose Signed-off-by: David S. Miller diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c52d4b5..4b24ff4 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -137,6 +137,7 @@ enum { IFLA_AF_SPEC, IFLA_GROUP, /* Group the device belongs to */ IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ __IFLA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ea..577592e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -602,6 +602,9 @@ struct tcamsg { #define TCA_ACT_TAB 1 /* attr type must be >=1 */ #define TCAA_MAX 1 +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) + /* End of information exported to user level */ #ifdef __KERNEL__ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 678f1ff..3702939 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -6,7 +6,7 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); extern int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd4..606a6e8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -60,7 +60,6 @@ struct rtnl_link { }; static DEFINE_MUTEX(rtnl_mutex); -static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) } /* All VF info */ -static inline int rtnl_vfinfo_size(const struct net_device *dev) +static inline int rtnl_vfinfo_size(const struct net_device *dev, + u32 ext_filter_mask) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { - + if (dev->dev.parent && dev_is_pci(dev->dev.parent) && + (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); size_t size = nla_total_size(sizeof(struct nlattr)); size += nla_total_size(num_vfs * sizeof(struct nlattr)); @@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev) return port_self_size; } -static noinline size_t if_nlmsg_size(const struct net_device *dev) +static noinline size_t if_nlmsg_size(const struct net_device *dev, + u32 ext_filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ - + nla_total_size(4) /* IFLA_NUM_VF */ - + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + + nla_total_size(ext_filter_mask + & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ @@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags) + unsigned int flags, u32 ext_filter_mask) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; copy_rtnl_link_stats64(nla_data(attr), stats); - if (dev->dev.parent) + if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent + && (ext_filter_mask & RTEXT_FILTER_VF)) { int i; struct nlattr *vfinfo, *vf; @@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct hlist_head *head; struct hlist_node *node; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; + nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI) <= 0) + NLM_F_MULTI, + ext_filter_mask) <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, [IFLA_AF_SPEC] = { .type = NLA_NESTED }, + [IFLA_EXT_MASK] = { .type = NLA_U32 }, }; EXPORT_SYMBOL(ifla_policy); @@ -1509,8 +1522,6 @@ errout: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev), - min_ifinfo_dump_size); return err; } @@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL; struct sk_buff *nskb; int err; + u32 ext_filter_mask = 0; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); @@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (dev == NULL) return -ENODEV; - nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } -static u16 rtnl_calcit(struct sk_buff *skb) +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; + u16 min_ifinfo_dump_size = 0; + + nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + + if (!ext_filter_mask) + return NLMSG_GOODSIZE; + /* + * traverse the list of net devices and compute the minimum + * buffer size based upon the filter mask. + */ + list_for_each_entry(dev, &net->dev_base_head, dev_list) { + min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, + if_nlmsg_size(dev, + ext_filter_mask)); + } + return min_ifinfo_dump_size; } @@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) int err = -ENOBUFS; size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); if (skb == NULL) goto errout; - min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); - - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; calcit = rtnl_get_calcit(family, type); if (calcit) - min_dump_alloc = calcit(skb); + min_dump_alloc = calcit(skb, nlh); __rtnl_unlock(); rtnl = net->rtnl; -- cgit v0.10.2 From a5e7424d424f6398a198ead79d99e0a3c2f24ce8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 21 Feb 2012 17:59:19 -0500 Subject: ipv4: ping: Fix recvmsg MSG_OOB error handling. Don't return an uninitialized variable as the error, return -EOPNOTSUPP instead. Reported-by: Dave Jones Signed-off-by: David S. Miller diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index aea5a19..b072386 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -630,6 +630,7 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); + err = -EOPNOTSUPP; if (flags & MSG_OOB) goto out; -- cgit v0.10.2 From 597cdbc2239e6019bbb2dd73b266f436166f0427 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Feb 2012 10:46:49 +0000 Subject: atm: clip: remove clip_tbl Commit 32092ecf0644 (atm: clip: Use device neigh support on top of "arp_tbl".) introduced a bug since clip_tbl is zeroed : Crash occurs in __neigh_for_each_release() idle_timer_check() must use instead arp_tbl and neigh_check_cb() should ignore non clip neighbours. Idea from David Miller. Reported-by: Meelis Roos Signed-off-by: Eric Dumazet Tested-by: Meelis Roos Signed-off-by: David S. Miller diff --git a/net/atm/clip.c b/net/atm/clip.c index c12c258..127fe70 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -46,8 +46,8 @@ static struct net_device *clip_devs; static struct atm_vcc *atmarpd; -static struct neigh_table clip_tbl; static struct timer_list idle_timer; +static const struct neigh_ops clip_neigh_ops; static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { @@ -123,6 +123,8 @@ static int neigh_check_cb(struct neighbour *n) struct atmarp_entry *entry = neighbour_priv(n); struct clip_vcc *cv; + if (n->ops != &clip_neigh_ops) + return 0; for (cv = entry->vccs; cv; cv = cv->next) { unsigned long exp = cv->last_use + cv->idle_timeout; @@ -154,10 +156,10 @@ static int neigh_check_cb(struct neighbour *n) static void idle_timer_check(unsigned long dummy) { - write_lock(&clip_tbl.lock); - __neigh_for_each_release(&clip_tbl, neigh_check_cb); + write_lock(&arp_tbl.lock); + __neigh_for_each_release(&arp_tbl, neigh_check_cb); mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - write_unlock(&clip_tbl.lock); + write_unlock(&arp_tbl.lock); } static int clip_arp_rcv(struct sk_buff *skb) -- cgit v0.10.2 From ba9adbe67e288823ac1deb7f11576ab5653f833e Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 22 Feb 2012 08:58:10 +0000 Subject: jme: Fix FIFO flush issue Set the RX FIFO flush watermark lower. According to Federico and JMicron's reply, setting it to 16QW would be stable on most platforms. Otherwise, user might experience packet drop issue. CC: stable@kernel.org Reported-by: Federico Quagliata Fixed-by: Federico Quagliata Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 27d651a..55cbf65 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2328,19 +2328,11 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) ((new_mtu) < IPV6_MIN_MTU)) return -EINVAL; - if (new_mtu > 4000) { - jme->reg_rxcs &= ~RXCS_FIFOTHNP; - jme->reg_rxcs |= RXCS_FIFOTHNP_64QW; - jme_restart_rx_engine(jme); - } else { - jme->reg_rxcs &= ~RXCS_FIFOTHNP; - jme->reg_rxcs |= RXCS_FIFOTHNP_128QW; - jme_restart_rx_engine(jme); - } netdev->mtu = new_mtu; netdev_update_features(netdev); + jme_restart_rx_engine(jme); jme_reset_link(jme); return 0; diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h index 4304072..3efc897 100644 --- a/drivers/net/ethernet/jme.h +++ b/drivers/net/ethernet/jme.h @@ -730,7 +730,7 @@ enum jme_rxcs_values { RXCS_RETRYCNT_60 = 0x00000F00, RXCS_DEFAULT = RXCS_FIFOTHTP_128T | - RXCS_FIFOTHNP_128QW | + RXCS_FIFOTHNP_16QW | RXCS_DMAREQSZ_128B | RXCS_RETRYGAP_256ns | RXCS_RETRYCNT_32, -- cgit v0.10.2 From 5095d64db1b978bdb31d30fed9e47dbf04f729be Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Tue, 21 Feb 2012 22:10:49 +0000 Subject: ipv6: ip6_route_output() never returns NULL. ip6_route_output() never returns NULL, so it is wrong to check if the return value is NULL. Signed-off-by: RongQing.Li Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index c7e95c8..5aa3981 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1926,8 +1926,10 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, }; dst = ip6_route_output(net, NULL, &fl6); - if (!dst) + if (dst->error) { + dst_release(dst); goto out_free; + } skb_dst_drop(skb); skb_dst_set(skb, dst); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8f02ef..c964958 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1545,9 +1545,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL) + if (dst->error) { + dst_release(dst); return; - + } dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; -- cgit v0.10.2 From 0541743b4b35f2ddc9e490b4e354930168b60d23 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Tue, 21 Feb 2012 22:10:50 +0000 Subject: ethernet/broadcom: ip6_route_output() never returns NULL. ip6_route_output() never returns NULL, so it is wrong to check if the return value is NULL. Signed-off-by: RongQing.Li Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index dd3a0a2..818a573 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -3584,7 +3584,11 @@ static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr, fl6.flowi6_oif = dst_addr->sin6_scope_id; *dst = ip6_route_output(&init_net, NULL, &fl6); - if (*dst) + if ((*dst)->error) { + dst_release(*dst); + *dst = NULL; + return -ENETUNREACH; + } else return 0; #endif -- cgit v0.10.2 From 5d38b1f8cf8798d4df7809b3f3e38fad4d923e85 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Tue, 21 Feb 2012 22:10:51 +0000 Subject: netfilter: ip6_route_output() never returns NULL. ip6_route_output() never returns NULL, so it is wrong to check if the return value is NULL. Signed-off-by: RongQing.Li Signed-off-by: David S. Miller diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 3aae66f..4d50579 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -152,9 +152,10 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL) + if (dst->error) { + dst_release(dst); return false; - + } skb_dst_drop(skb); skb_dst_set(skb, dst); skb->dev = dst->dev; -- cgit v0.10.2 From 22ad7499bc9297e47c8779bf5523694f28338499 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Feb 2012 21:30:25 +0000 Subject: hso: memsetting wrong data in hso_get_count() The intent was to clear out the icount struct here, but we accidentally clear stack memory instead. It probably will lead to a NULL dereference right away. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 304fe78..e1324b4 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1632,7 +1632,7 @@ static int hso_get_count(struct tty_struct *tty, struct hso_serial *serial = get_serial_by_tty(tty); struct hso_tiocmget *tiocmget = serial->tiocmget; - memset(&icount, 0, sizeof(struct serial_icounter_struct)); + memset(icount, 0, sizeof(struct serial_icounter_struct)); if (!tiocmget) return -ENOENT; -- cgit v0.10.2 From ee932bf9acb2e2c6a309e808000f24856330e3f9 Mon Sep 17 00:00:00 2001 From: Scott Talbert Date: Tue, 21 Feb 2012 13:06:00 +0000 Subject: Move Logitech Harmony 900 from cdc_ether to zaurus In the current kernel implementation, the Logitech Harmony 900 remote control is matched to the cdc_ether driver through the generic USB_CDC_SUBCLASS_MDLM entry. However, this device appears to be of the pseudo-MDLM (Belcarra) type, rather than the standard one. This patch blacklists the Harmony 900 from the cdc_ether driver and whitelists it for the pseudo-MDLM driver in zaurus. Signed-off-by: Scott Talbert Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 41a61ef..90a3002 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -573,6 +573,13 @@ static const struct usb_device_id products [] = { .driver_info = 0, }, +/* Logitech Harmony 900 - uses the pseudo-MDLM (BLAN) driver */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x046d, 0xc11f, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* * WHITELIST!!! * diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index ed2caed..c3197ce 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -354,6 +354,13 @@ static const struct usb_device_id products [] = { ZAURUS_MASTER_INTERFACE, .driver_info = OLYMPUS_MXL_INFO, }, + +/* Logitech Harmony 900 - uses the pseudo-MDLM (BLAN) driver */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x046d, 0xc11f, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &bogus_mdlm_info, +}, { }, // END }; MODULE_DEVICE_TABLE(usb, products); -- cgit v0.10.2 From 03606895cd98c0a628b17324fd7b5ff15db7e3cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2012 10:55:02 +0000 Subject: ipsec: be careful of non existing mac headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Niccolo Belli reported ipsec crashes in case we handle a frame without mac header (atm in his case) Before copying mac header, better make sure it is present. Bugzilla reference: https://bugzilla.kernel.org/show_bug.cgi?id=42809 Reported-by: Niccolò Belli Tested-by: Niccolò Belli Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50db9b0..ae86ade 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1465,6 +1465,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_mac_header_rebuild(struct sk_buff *skb) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -skb->mac_len); + memmove(skb_mac_header(skb), old_mac, skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 6341818..e3db3f9 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e..ed4bf11 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index a81ce94..9949a35 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 261e6e6..9f2095b 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: -- cgit v0.10.2 From b8e3995af4c7da7707b1710332a31f66e06b74dc Mon Sep 17 00:00:00 2001 From: David McKay Date: Tue, 21 Feb 2012 21:24:57 +0000 Subject: netdev/phy/icplus: Correct broken phy_init code The code for ip1001_config_init() was totally broken if you were not using RGMII. Instead of returning an error code or zero it actually returned the value in the IP1001_SPEC_CTRL_STATUS_2 register. It was also trying to set the IP1001_APS_ON bit , but never actually wrote back the register. The error checking was also incorrect in both this function and the reset function, so this patch fixes that up in a consistent fashion. Signed-off-by: David McKay Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index c81f136..7ee4f58 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -98,20 +98,24 @@ static int ip175c_config_init(struct phy_device *phydev) static int ip1xx_reset(struct phy_device *phydev) { - int err, bmcr; + int bmcr; /* Software Reset PHY */ bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; bmcr |= BMCR_RESET; - err = phy_write(phydev, MII_BMCR, bmcr); - if (err < 0) - return err; + bmcr = phy_write(phydev, MII_BMCR, bmcr); + if (bmcr < 0) + return bmcr; do { bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; } while (bmcr & BMCR_RESET); - return err; + return 0; } static int ip1001_config_init(struct phy_device *phydev) @@ -124,7 +128,10 @@ static int ip1001_config_init(struct phy_device *phydev) /* Enable Auto Power Saving mode */ c = phy_read(phydev, IP1001_SPEC_CTRL_STATUS_2); + if (c < 0) + return c; c |= IP1001_APS_ON; + c = phy_write(phydev, IP1001_SPEC_CTRL_STATUS_2, c); if (c < 0) return c; @@ -132,11 +139,16 @@ static int ip1001_config_init(struct phy_device *phydev) /* Additional delay (2ns) used to adjust RX clock phase * at RGMII interface */ c = phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); + if (c < 0) + return c; + c |= IP1001_PHASE_SEL_MASK; c = phy_write(phydev, IP10XX_SPEC_CTRL_STATUS, c); + if (c < 0) + return c; } - return c; + return 0; } static int ip101a_config_init(struct phy_device *phydev) -- cgit v0.10.2 From e3e09f2645b435062a34a2587a32ae8e5a5b48ab Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 21 Feb 2012 21:26:28 +0000 Subject: phy: IC+101G and PHY_HAS_INTERRUPT flag This patch adds the PHY_HAS_INTERRUPT flag for IC+101 device series. Also the patch does a simple dity-up to signal that the driver actually is for IP101A LF and IP101G devices. In fact, these are two similar PHYs that have the same IDs and mainly differ for the EEE capability supported in the G series. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 7ee4f58..0856e1b 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -30,16 +30,16 @@ #include #include -MODULE_DESCRIPTION("ICPlus IP175C/IP101A/IC1001 PHY drivers"); +MODULE_DESCRIPTION("ICPlus IP175C/IP101A/IP101G/IC1001 PHY drivers"); MODULE_AUTHOR("Michael Barkowski"); MODULE_LICENSE("GPL"); -/* IP101A/IP1001 */ -#define IP10XX_SPEC_CTRL_STATUS 16 /* Spec. Control Register */ -#define IP1001_SPEC_CTRL_STATUS_2 20 /* IP1001 Spec. Control Reg 2 */ -#define IP1001_PHASE_SEL_MASK 3 /* IP1001 RX/TXPHASE_SEL */ -#define IP1001_APS_ON 11 /* IP1001 APS Mode bit */ -#define IP101A_APS_ON 2 /* IP101A APS Mode bit */ +/* IP101A/G - IP1001 */ +#define IP10XX_SPEC_CTRL_STATUS 16 /* Spec. Control Register */ +#define IP1001_SPEC_CTRL_STATUS_2 20 /* IP1001 Spec. Control Reg 2 */ +#define IP1001_PHASE_SEL_MASK 3 /* IP1001 RX/TXPHASE_SEL */ +#define IP1001_APS_ON 11 /* IP1001 APS Mode bit */ +#define IP101A_G_APS_ON 2 /* IP101A/G APS Mode bit */ static int ip175c_config_init(struct phy_device *phydev) { @@ -151,7 +151,7 @@ static int ip1001_config_init(struct phy_device *phydev) return 0; } -static int ip101a_config_init(struct phy_device *phydev) +static int ip101a_g_config_init(struct phy_device *phydev) { int c; @@ -161,7 +161,7 @@ static int ip101a_config_init(struct phy_device *phydev) /* Enable Auto Power Saving mode */ c = phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); - c |= IP101A_APS_ON; + c |= IP101A_G_APS_ON; return c; } @@ -203,6 +203,7 @@ static struct phy_driver ip1001_driver = { .phy_id_mask = 0x0ffffff0, .features = PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_INTERRUPT, .config_init = &ip1001_config_init, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, @@ -211,13 +212,14 @@ static struct phy_driver ip1001_driver = { .driver = { .owner = THIS_MODULE,}, }; -static struct phy_driver ip101a_driver = { +static struct phy_driver ip101a_g_driver = { .phy_id = 0x02430c54, - .name = "ICPlus IP101A", + .name = "ICPlus IP101A/G", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .config_init = &ip101a_config_init, + .flags = PHY_HAS_INTERRUPT, + .config_init = &ip101a_g_config_init, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, .suspend = genphy_suspend, @@ -233,7 +235,7 @@ static int __init icplus_init(void) if (ret < 0) return -ENODEV; - ret = phy_driver_register(&ip101a_driver); + ret = phy_driver_register(&ip101a_g_driver); if (ret < 0) return -ENODEV; @@ -243,7 +245,7 @@ static int __init icplus_init(void) static void __exit icplus_exit(void) { phy_driver_unregister(&ip1001_driver); - phy_driver_unregister(&ip101a_driver); + phy_driver_unregister(&ip101a_g_driver); phy_driver_unregister(&ip175c_driver); } @@ -253,6 +255,7 @@ module_exit(icplus_exit); static struct mdio_device_id __maybe_unused icplus_tbl[] = { { 0x02430d80, 0x0ffffff0 }, { 0x02430d90, 0x0ffffff0 }, + { 0x02430c54, 0x0ffffff0 }, { } }; -- cgit v0.10.2 From 1e0f03d57d7092f1f4d93a91fb7ece57b1514a88 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 23 Feb 2012 07:04:35 +0000 Subject: mlx4_core: Fixing array indexes when setting port types Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 32f8799..d498f04 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -538,7 +538,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); - dev->caps.port_type[port + 1] = port_types[port]; + dev->caps.port_type[port] = port_types[port - 1]; err = mlx4_SET_PORT(dev, port); if (err) { mlx4_err(dev, "Failed to set port %d, " -- cgit v0.10.2 From 5d69703263d588dbb03f4e57091afd8942d96e6d Mon Sep 17 00:00:00 2001 From: Christian Riesch Date: Thu, 23 Feb 2012 01:14:17 +0000 Subject: davinci_emac: Do not free all rx dma descriptors during init This patch fixes a regression that was introduced by commit 0a5f38467765ee15478db90d81e40c269c8dda20 davinci_emac: Add Carrier Link OK check in Davinci RX Handler Said commit adds a check whether the carrier link is ok. If the link is not ok, the skb is freed and no new dma descriptor added to the rx dma channel. This causes trouble during initialization when the carrier status has not yet been updated. If a lot of packets are received while netif_carrier_ok returns false, all dma descriptors are freed and the rx dma transfer is stopped. The bug occurs when the board is connected to a network with lots of traffic and the ifconfig down/up is done, e.g., when reconfiguring the interface with DHCP. The bug can be reproduced by flood pinging the davinci board while doing ifconfig eth0 down ifconfig eth0 up on the board. After that, the rx path stops working and the overrun value reported by ifconfig is counting up. This patch reverts commit 0a5f38467765ee15478db90d81e40c269c8dda20 and instead issues warnings only if cpdma_chan_submit returns -ENOMEM. Signed-off-by: Christian Riesch Cc: Cc: Hegde, Vinay Cc: Cyril Chemparathy Cc: Sascha Hauer Tested-by: Rajashekhara, Sudhakar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 4fa0bcb..4b2f545 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1009,7 +1009,7 @@ static void emac_rx_handler(void *token, int len, int status) int ret; /* free and bail if we are shutting down */ - if (unlikely(!netif_running(ndev) || !netif_carrier_ok(ndev))) { + if (unlikely(!netif_running(ndev))) { dev_kfree_skb_any(skb); return; } @@ -1038,7 +1038,9 @@ static void emac_rx_handler(void *token, int len, int status) recycle: ret = cpdma_chan_submit(priv->rxchan, skb, skb->data, skb_tailroom(skb), GFP_KERNEL); - if (WARN_ON(ret < 0)) + + WARN_ON(ret == -ENOMEM); + if (unlikely(ret < 0)) dev_kfree_skb_any(skb); } -- cgit v0.10.2 From 279072882dc0149e5740dace075e1a49f087046d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Feb 2012 12:18:38 +0100 Subject: Revert "netfilter: ctnetlink: fix soft lockup when netlink adds new entries" This reverts commit af14cca162ddcdea017b648c21b9b091e4bf1fa4. This patch contains a race condition between packets and ctnetlink in the conntrack addition. A new patch to fix this issue follows up. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cc70517..9307b03 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1367,12 +1367,15 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { + spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1466,10 +1469,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); - spin_lock_bh(&nf_conntrack_lock); nf_conntrack_hash_insert(ct); - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); rcu_read_unlock(); return ct; @@ -1490,7 +1490,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; u16 zone; int err; @@ -1513,22 +1512,25 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = nf_conntrack_find_get(net, zone, &otuple); + h = __nf_conntrack_find(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = nf_conntrack_find_get(net, zone, &rtuple); - spin_unlock_bh(&nf_conntrack_lock); + h = __nf_conntrack_find(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { + struct nf_conn *ct; enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); - if (IS_ERR(ct)) - return PTR_ERR(ct); - + if (IS_ERR(ct)) { + err = PTR_ERR(ct); + goto out_unlock; + } err = 0; + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = IPCT_RELATED; else @@ -1543,19 +1545,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); nf_ct_put(ct); - } + } else + spin_unlock_bh(&nf_conntrack_lock); return err; } /* implicit 'else' */ + /* We manipulate the conntrack inside the global conntrack table lock, + * so there's no need to increase the refcount */ err = -EEXIST; - ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - spin_lock_bh(&nf_conntrack_lock); + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + err = ctnetlink_change_conntrack(ct, cda); - spin_unlock_bh(&nf_conntrack_lock); if (err == 0) { + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1564,10 +1570,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_MARK), ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); - } + nf_ct_put(ct); + } else + spin_unlock_bh(&nf_conntrack_lock); + + return err; } - nf_ct_put(ct); +out_unlock: + spin_unlock_bh(&nf_conntrack_lock); return err; } -- cgit v0.10.2 From 7d367e06688dc7a2cc98c2ace04e1296e1d987e2 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 24 Feb 2012 11:45:49 +0100 Subject: netfilter: ctnetlink: fix soft lockup when netlink adds new entries (v2) Marcell Zambo and Janos Farago noticed and reported that when new conntrack entries are added via netlink and the conntrack table gets full, soft lockup happens. This is because the nf_conntrack_lock is held while nf_conntrack_alloc is called, which is in turn wants to lock nf_conntrack_lock while evicting entries from the full table. The patch fixes the soft lockup with limiting the holding of the nf_conntrack_lock to the minimum, where it's absolutely required. It required to extend (and thus change) nf_conntrack_hash_insert so that it makes sure conntrack and ctnetlink do not add the same entry twice to the conntrack table. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8a2b0ae..ab86036 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -209,7 +209,7 @@ extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -extern void nf_conntrack_hash_insert(struct nf_conn *ct); +extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_insert_dying_list(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 76613f5..ed86a3b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, &net->ct.hash[repl_hash]); } -void nf_conntrack_hash_insert(struct nf_conn *ct) +int +nf_conntrack_hash_check_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; u16 zone; zone = nf_ct_zone(ct); - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + spin_lock_bh(&nf_conntrack_lock); + /* See if there's one in the list already, including reverse */ + hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + + add_timer(&ct->timeout); + nf_conntrack_get(&ct->ct_general); __nf_conntrack_hash_insert(ct, hash, repl_hash); + NF_CT_STAT_INC(net, insert); + spin_unlock_bh(&nf_conntrack_lock); + + return 0; + +out: + NF_CT_STAT_INC(net, insert_failed); + spin_unlock_bh(&nf_conntrack_lock); + return -EEXIST; } -EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); +EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); /* Confirm a connection given skb; places it in hash table */ int diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b03..30c9d4c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); - spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1468,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, if (tstamp) tstamp->start = ktime_to_ns(ktime_get_real()); - add_timer(&ct->timeout); - nf_conntrack_hash_insert(ct); + err = nf_conntrack_hash_check_insert(ct); + if (err < 0) + goto err2; + rcu_read_unlock(); return ct; @@ -1490,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; u16 zone; int err; @@ -1510,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } - spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, zone, &otuple); + h = nf_conntrack_find_get(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, zone, &rtuple); + h = nf_conntrack_find_get(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - struct nf_conn *ct; enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); - if (IS_ERR(ct)) { - err = PTR_ERR(ct); - goto out_unlock; - } + if (IS_ERR(ct)) + return PTR_ERR(ct); + err = 0; - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = IPCT_RELATED; else @@ -1545,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); + } return err; } /* implicit 'else' */ - /* We manipulate the conntrack inside the global conntrack table lock, - * so there's no need to increase the refcount */ err = -EEXIST; + ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - + spin_lock_bh(&nf_conntrack_lock); err = ctnetlink_change_conntrack(ct, cda); + spin_unlock_bh(&nf_conntrack_lock); if (err == 0) { - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1570,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_MARK), ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); - nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); - - return err; + } } -out_unlock: - spin_unlock_bh(&nf_conntrack_lock); + nf_ct_put(ct); return err; } -- cgit v0.10.2 From bff528578fc3c4a14227b052a313109b5ffb73da Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 24 Feb 2012 08:08:20 +0000 Subject: gre: fix spelling in comments The original spelling and bad word choice makes these comments hard to read. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6b3ca5b..38673d2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -65,7 +65,7 @@ it is infeasible task. The most general solutions would be to keep skb->encapsulation counter (sort of local ttl), and silently drop packet when it expires. It is a good - solution, but it supposes maintaing new variable in ALL + solution, but it supposes maintaining new variable in ALL skb, even if no tunneling is used. Current solution: xmit_recursion breaks dead loops. This is a percpu @@ -91,14 +91,14 @@ One of them is to parse packet trying to detect inner encapsulation made by our node. It is difficult or even impossible, especially, - taking into account fragmentation. TO be short, tt is not solution at all. + taking into account fragmentation. TO be short, ttl is not solution at all. Current solution: The solution was UNEXPECTEDLY SIMPLE. We force DF flag on tunnels with preconfigured hop limit, that is ALL. :-) Well, it does not remove the problem completely, but exponential growth of network traffic is changed to linear (branches, that exceed pmtu are pruned) and tunnel mtu - fastly degrades to value <68, where looping stops. + rapidly degrades to value <68, where looping stops. Yes, it is not good if there exists a router in the loop, which does not force DF, even when encapsulating packets have DF set. But it is not our problem! Nobody could accuse us, we made @@ -457,8 +457,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info) GRE tunnels with enabled checksum. Tell them "thank you". Well, I wonder, rfc1812 was written by Cisco employee, - what the hell these idiots break standrads established - by themself??? + what the hell these idiots break standards established + by themselves??? */ const struct iphdr *iph = (const struct iphdr *)skb->data; -- cgit v0.10.2 From 21ca54e99b085b9ff4c91ca41afe42a439966109 Mon Sep 17 00:00:00 2001 From: Santosh Nayak Date: Fri, 24 Feb 2012 06:56:39 +0000 Subject: enic: Fix endianness bug. Sparse complaints the endian bug. Signed-off-by: Santosh Nayak Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/cq_enet_desc.h b/drivers/net/ethernet/cisco/enic/cq_enet_desc.h index c2c0680..ac37cac 100644 --- a/drivers/net/ethernet/cisco/enic/cq_enet_desc.h +++ b/drivers/net/ethernet/cisco/enic/cq_enet_desc.h @@ -157,7 +157,7 @@ static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; *fcoe_enc_error = (desc->flags & CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; - *fcoe_eof = (u8)((desc->checksum_fcoe >> + *fcoe_eof = (u8)((le16_to_cpu(desc->checksum_fcoe) >> CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & CQ_ENET_RQ_DESC_FCOE_EOF_MASK); *checksum = 0; diff --git a/drivers/net/ethernet/cisco/enic/enic_pp.c b/drivers/net/ethernet/cisco/enic/enic_pp.c index 22bf03a..c347b62 100644 --- a/drivers/net/ethernet/cisco/enic/enic_pp.c +++ b/drivers/net/ethernet/cisco/enic/enic_pp.c @@ -72,7 +72,7 @@ static int enic_set_port_profile(struct enic *enic, int vf) struct enic_port_profile *pp; struct vic_provinfo *vp; const u8 oui[3] = VIC_PROVINFO_CISCO_OUI; - const u16 os_type = htons(VIC_GENERIC_PROV_OS_TYPE_LINUX); + const __be16 os_type = htons(VIC_GENERIC_PROV_OS_TYPE_LINUX); char uuid_str[38]; char client_mac_str[18]; u8 *client_mac; -- cgit v0.10.2 From 8a49ad6e89feb5015e77ce6efeb2678947117e20 Mon Sep 17 00:00:00 2001 From: Ben McKeegan Date: Fri, 24 Feb 2012 06:33:56 +0000 Subject: ppp: fix 'ppp_mp_reconstruct bad seq' errors This patch fixes a (mostly cosmetic) bug introduced by the patch 'ppp: Use SKB queue abstraction interfaces in fragment processing' found here: http://www.spinics.net/lists/netdev/msg153312.html The above patch rewrote and moved the code responsible for cleaning up discarded fragments but the new code does not catch every case where this is necessary. This results in some discarded fragments remaining in the queue, and triggering a 'bad seq' error on the subsequent call to ppp_mp_reconstruct. Fragments are discarded whenever other fragments of the same frame have been lost. This can generate a lot of unwanted and misleading log messages. This patch also adds additional detail to the debug logging to make it clearer which fragments were lost and which other fragments were discarded as a result of losses. (Run pppd with 'kdebug 1' option to enable debug logging.) Signed-off-by: Ben McKeegan Signed-off-by: David S. Miller diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index edfa15d..486b404 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2024,14 +2024,22 @@ ppp_mp_reconstruct(struct ppp *ppp) continue; } if (PPP_MP_CB(p)->sequence != seq) { + u32 oldseq; /* Fragment `seq' is missing. If it is after minseq, it might arrive later, so stop here. */ if (seq_after(seq, minseq)) break; /* Fragment `seq' is lost, keep going. */ lost = 1; + oldseq = seq; seq = seq_before(minseq, PPP_MP_CB(p)->sequence)? minseq + 1: PPP_MP_CB(p)->sequence; + + if (ppp->debug & 1) + netdev_printk(KERN_DEBUG, ppp->dev, + "lost frag %u..%u\n", + oldseq, seq-1); + goto again; } @@ -2076,6 +2084,10 @@ ppp_mp_reconstruct(struct ppp *ppp) struct sk_buff *tmp2; skb_queue_reverse_walk_from_safe(list, p, tmp2) { + if (ppp->debug & 1) + netdev_printk(KERN_DEBUG, ppp->dev, + "discarding frag %u\n", + PPP_MP_CB(p)->sequence); __skb_unlink(p, list); kfree_skb(p); } @@ -2091,6 +2103,17 @@ ppp_mp_reconstruct(struct ppp *ppp) /* If we have discarded any fragments, signal a receive error. */ if (PPP_MP_CB(head)->sequence != ppp->nextseq) { + skb_queue_walk_safe(list, p, tmp) { + if (p == head) + break; + if (ppp->debug & 1) + netdev_printk(KERN_DEBUG, ppp->dev, + "discarding frag %u\n", + PPP_MP_CB(p)->sequence); + __skb_unlink(p, list); + kfree_skb(p); + } + if (ppp->debug & 1) netdev_printk(KERN_DEBUG, ppp->dev, " missed pkts %u..%u\n", -- cgit v0.10.2 From ff3bc1e7527504a93710535611b2f812f3bb89bf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 25 Feb 2012 00:03:10 +0000 Subject: sfc: Fix assignment of ip_summed for pre-allocated skbs When pre-allocating skbs for received packets, we set ip_summed = CHECKSUM_UNNCESSARY. We used to change it back to CHECKSUM_NONE when the received packet had an incorrect checksum or unhandled protocol. Commit bc8acf2c8c3e43fcc192762a9f964b3e9a17748b ('drivers/net: avoid some skb->ip_summed initializations') mistakenly replaced the latter assignment with a DEBUG-only assertion that ip_summed == CHECKSUM_NONE. This assertion is always false, but it seems no-one has exercised this code path in a DEBUG build. Fix this by moving our assignment of CHECKSUM_UNNECESSARY into efx_rx_packet_gro(). Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index aca3498..fc52fca 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -156,11 +156,10 @@ static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue) if (unlikely(!skb)) return -ENOMEM; - /* Adjust the SKB for padding and checksum */ + /* Adjust the SKB for padding */ skb_reserve(skb, NET_IP_ALIGN); rx_buf->len = skb_len - NET_IP_ALIGN; rx_buf->is_page = false; - skb->ip_summed = CHECKSUM_UNNECESSARY; rx_buf->dma_addr = pci_map_single(efx->pci_dev, skb->data, rx_buf->len, @@ -496,6 +495,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel, EFX_BUG_ON_PARANOID(!checksummed); rx_buf->u.skb = NULL; + skb->ip_summed = CHECKSUM_UNNECESSARY; gro_result = napi_gro_receive(napi, skb); } -- cgit v0.10.2