From 12b101555f4a67db67a66966a516075bd477741f Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Fri, 21 Mar 2008 15:01:50 -0700 Subject: [IPV4]: Fix null dereference in ip_defrag Been seeing occasional panics in my testing of 2.6.25-rc in ip_defrag. Offending line in ip_defrag is here: net = skb->dev->nd_net where dev is NULL. Bisected the problem down to commit ac18e7509e7df327e30d6e073a787d922eaf211d ([NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces). Below patch (idea from Patrick McHardy) fixes the problem for me. Signed-off-by: Phil Oester Acked-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a2e92f9..3b2e5ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -568,7 +568,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev->nd_net; + net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); -- cgit v0.10.2 From 1233823b0847190976d69a86d7bb1287992ba2c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Mar 2008 15:40:47 -0700 Subject: [SCTP]: Fix build warnings with IPV6 disabled. Introduced by 270637abff0cdf848b910b9f96ad342e1da61c66 ("[SCTP]: Fix a race between module load and protosw access") Reported by Gabriel C: In file included from net/sctp/sm_statetable.c:50: include/net/sctp/sctp.h: In function 'sctp_v6_pf_init': include/net/sctp/sctp.h:392: warning: 'return' with a value, in function returning void In file included from net/sctp/sm_statefuns.c:62: include/net/sctp/sctp.h: In function 'sctp_v6_pf_init': include/net/sctp/sctp.h:392: warning: 'return' with a value, in function returning void ... Signed-off-by: David S. Miller diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 57ed3e3..ea80673 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -389,7 +389,7 @@ void sctp_v6_del_protocol(void); #else /* #ifdef defined(CONFIG_IPV6) */ -static inline void sctp_v6_pf_init(void) { return 0; } +static inline void sctp_v6_pf_init(void) { return; } static inline void sctp_v6_pf_exit(void) { return; } static inline int sctp_v6_protosw_init(void) { return 0; } static inline void sctp_v6_protosw_exit(void) { return; } -- cgit v0.10.2 From 7512cbf6efc97644812f137527a54b8e92b6a90a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 21 Mar 2008 15:58:52 -0700 Subject: [DLCI]: Fix tiny race between module unload and sock_ioctl. This is a narrow pedantry :) but the dlci_ioctl_hook check and call should not be parted with the mutex lock. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller diff --git a/net/socket.c b/net/socket.c index b6d35cd..9d3fbfb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -909,11 +909,10 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!dlci_ioctl_hook) request_module("dlci"); - if (dlci_ioctl_hook) { - mutex_lock(&dlci_ioctl_mutex); + mutex_lock(&dlci_ioctl_mutex); + if (dlci_ioctl_hook) err = dlci_ioctl_hook(cmd, argp); - mutex_unlock(&dlci_ioctl_mutex); - } + mutex_unlock(&dlci_ioctl_mutex); break; default: err = sock->ops->ioctl(sock, cmd, arg); -- cgit v0.10.2 From 69d1506731168d6845a76a303b2c45f7c05f3f2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 22 Mar 2008 15:47:05 -0700 Subject: [TCP]: Let skbs grow over a page on fast peers While testing the virtio-net driver on KVM with TSO I noticed that TSO performance with a 1500 MTU is significantly worse compared to the performance of non-TSO with a 16436 MTU. The packet dump shows that most of the packets sent are smaller than a page. Looking at the code this actually is quite obvious as it always stop extending the packet if it's the first packet yet to be sent and if it's larger than the MSS. Since each extension is bound by the page size, this means that (given a 1500 MTU) we're very unlikely to construct packets greater than a page, provided that the receiver and the path is fast enough so that packets can always be sent immediately. The fix is also quite obvious. The push calls inside the loop is just an optimisation so that we don't end up doing all the sending at the end of the loop. Therefore there is no specific reason why it has to do so at MSS boundaries. For TSO, the most natural extension of this optimisation is to do the pushing once the skb exceeds the TSO size goal. This is what the patch does and testing with KVM shows that the TSO performance with a 1500 MTU easily surpasses that of a 16436 MTU and indeed the packet sizes sent are generally larger than 16436. I don't see any obvious downsides for slower peers or connections, but it would be prudent to test this extensively to ensure that those cases don't regress. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 071e83a..39b629a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -735,7 +735,7 @@ new_segment: if (!(psize -= copy)) goto out; - if (skb->len < mss_now || (flags & MSG_OOB)) + if (skb->len < size_goal || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -981,7 +981,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < mss_now || (flags & MSG_OOB)) + if (skb->len < size_goal || (flags & MSG_OOB)) continue; if (forced_push(tp)) { -- cgit v0.10.2 From 6440cc9e0f48ade57af7be28008cbfa6a991f287 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 22 Mar 2008 17:59:58 -0700 Subject: [IPV4] fib_trie: fix warning from rcu_assign_poinger This gets rid of a warning caused by the test in rcu_assign_pointer. I tried to fix rcu_assign_pointer, but that devolved into a long set of discussions about doing it right that came to no real solution. Since the test in rcu_assign_pointer for constant NULL would never succeed in fib_trie, just open code instead. Signed-off-by: Stephen Hemminger Acked-by: Paul E. McKenney Signed-off-by: David S. Miller diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1ff446d..f6cdc01 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -177,10 +177,13 @@ static inline struct tnode *node_parent_rcu(struct node *node) return rcu_dereference(ret); } +/* Same as rcu_assign_pointer + * but that macro() assumes that value is a pointer. + */ static inline void node_set_parent(struct node *node, struct tnode *ptr) { - rcu_assign_pointer(node->parent, - (unsigned long)ptr | NODE_TYPE(node)); + smp_wmb(); + node->parent = (unsigned long)ptr | NODE_TYPE(node); } static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) -- cgit v0.10.2 From 421f099bc555c5f1516fdf5060de1d6bb5f51002 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 Mar 2008 18:04:16 -0700 Subject: [IPV6] net/ipv6/ndisc.c: remove unused variable The variable hlen is initialized but never used otherwise. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ type T; identifier i; constant C; @@ ( extern T i; | - T i; <+... when != i - i = C; ...+> ) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0d33a7d..51557c2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1420,7 +1420,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, u8 *opt; int rd_len; int err; - int hlen; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; dev = skb->dev; @@ -1491,7 +1490,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - hlen = 0; skb_reserve(buff, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, -- cgit v0.10.2 From 53a6201fdfa04accc91ea1a7accce8e8bc37ef8e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 Mar 2008 18:05:33 -0700 Subject: [9P] net/9p/trans_fd.c: remove unused variable The variable cb is initialized but never used otherwise. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ type T; identifier i; constant C; @@ ( extern T i; | - T i; <+... when != i - i = C; ...+> ) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1aa9d51..4e8d4e7 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -861,7 +861,6 @@ static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) static void p9_mux_flush_cb(struct p9_req *freq, void *a) { - p9_conn_req_callback cb; int tag; struct p9_conn *m; struct p9_req *req, *rreq, *rptr; @@ -872,7 +871,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) freq->tcall->params.tflush.oldtag); spin_lock(&m->lock); - cb = NULL; tag = freq->tcall->params.tflush.oldtag; req = NULL; list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { -- cgit v0.10.2 From 2572c149a2f52232ce690ddb9c6fd0c90ffd61cd Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 23 Mar 2008 03:07:45 -0700 Subject: BNX2X: prevent ethtool from setting port type On 10GBaseT boards setting the type to TP will cause the driver to try to configure 1GBaseT. Since there are currently no boards that support setting of the port type, disable this for now. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller diff --git a/drivers/net/bnx2x.c b/drivers/net/bnx2x.c index 8af142c..de32b3f 100644 --- a/drivers/net/bnx2x.c +++ b/drivers/net/bnx2x.c @@ -63,8 +63,8 @@ #include "bnx2x.h" #include "bnx2x_init.h" -#define DRV_MODULE_VERSION "1.40.22" -#define DRV_MODULE_RELDATE "2007/11/27" +#define DRV_MODULE_VERSION "1.42.3" +#define DRV_MODULE_RELDATE "2008/3/9" #define BNX2X_BC_VER 0x040200 /* Time in jiffies before concluding the transmitter is hung. */ @@ -8008,38 +8008,6 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex, cmd->port, cmd->phy_address, cmd->transceiver, cmd->autoneg, cmd->maxtxpkt, cmd->maxrxpkt); - switch (cmd->port) { - case PORT_TP: - if (!(bp->supported & SUPPORTED_TP)) { - DP(NETIF_MSG_LINK, "TP not supported\n"); - return -EINVAL; - } - - if (bp->phy_flags & PHY_XGXS_FLAG) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_1G); - bnx2x_phy_deassert(bp); - } - break; - - case PORT_FIBRE: - if (!(bp->supported & SUPPORTED_FIBRE)) { - DP(NETIF_MSG_LINK, "FIBRE not supported\n"); - return -EINVAL; - } - - if (!(bp->phy_flags & PHY_XGXS_FLAG)) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_10G); - bnx2x_phy_deassert(bp); - } - break; - - default: - DP(NETIF_MSG_LINK, "Unknown port type\n"); - return -EINVAL; - } - if (cmd->autoneg == AUTONEG_ENABLE) { if (!(bp->supported & SUPPORTED_Autoneg)) { DP(NETIF_MSG_LINK, "Aotoneg not supported\n"); -- cgit v0.10.2 From da990a2402aeaee84837f29054c4628eb02f7493 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 23 Mar 2008 03:35:12 -0700 Subject: [SUNGEM]: Fix NAPI assertion failure. As reported by Johannes Berg: I started getting this warning with recent kernels: [ 773.908927] ------------[ cut here ]------------ [ 773.908954] Badness at net/core/dev.c:2204 ... If we loop more than once in gem_poll(), we'll use more than the real budget in our gem_rx() calls, thus eventually trigger the caller's assertions in net_rx_action(). Subtract "work_done" from "budget" for the second arg to gem_rx() to fix the bug. Signed-off-by: David S. Miller diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 9721279..4291458 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -912,7 +912,7 @@ static int gem_poll(struct napi_struct *napi, int budget) * rx ring - must call napi_disable(), which * schedule_timeout()'s if polling is already disabled. */ - work_done += gem_rx(gp, budget); + work_done += gem_rx(gp, budget - work_done); if (work_done >= budget) return work_done; -- cgit v0.10.2 From dbee0d3f4603b9d0e56234a0743321fe4dad31ca Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Sun, 23 Mar 2008 21:45:36 -0700 Subject: [ATM]: When proc_create() fails, do some error handling work and return -ENOMEM. Signed-off-by: Wang Chen Signed-off-by: David S. Miller diff --git a/net/atm/clip.c b/net/atm/clip.c index d30167c..2ab1e36 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -947,6 +947,8 @@ static const struct file_operations arp_seq_fops = { }; #endif +static void atm_clip_exit_noproc(void); + static int __init atm_clip_init(void) { neigh_table_init_no_netlink(&clip_tbl); @@ -963,18 +965,22 @@ static int __init atm_clip_init(void) struct proc_dir_entry *p; p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops); + if (!p) { + printk(KERN_ERR "Unable to initialize " + "/proc/net/atm/arp\n"); + atm_clip_exit_noproc(); + return -ENOMEM; + } } #endif return 0; } -static void __exit atm_clip_exit(void) +static void atm_clip_exit_noproc(void) { struct net_device *dev, *next; - remove_proc_entry("arp", atm_proc_root); - unregister_inetaddr_notifier(&clip_inet_notifier); unregister_netdevice_notifier(&clip_dev_notifier); @@ -1005,6 +1011,13 @@ static void __exit atm_clip_exit(void) clip_tbl_hook = NULL; } +static void __exit atm_clip_exit(void) +{ + remove_proc_entry("arp", atm_proc_root); + + atm_clip_exit_noproc(); +} + module_init(atm_clip_init); module_exit(atm_clip_exit); MODULE_AUTHOR("Werner Almesberger"); diff --git a/net/atm/lec.c b/net/atm/lec.c index 0e450d1..a2efa7f 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1250,6 +1250,10 @@ static int __init lane_module_init(void) struct proc_dir_entry *p; p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops); + if (!p) { + printk(KERN_ERR "Unable to initialize /proc/net/atm/lec\n"); + return -ENOMEM; + } #endif register_atm_ioctl(&lane_ioctl_ops); -- cgit v0.10.2 From 4b1b366721101f2f0d2350fbdccb679f7909cf57 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Sun, 23 Mar 2008 21:51:12 -0700 Subject: connector: convert to single-threaded workqueue From: Evgeniy Polyakov We don't need one cqueue thread for each CPU. cqueue is used for receiving userspace datagrams, which are very rare and thus will happily live with a single queue. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index 5732ca3..b6fe7e7 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -146,7 +146,7 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls) dev->nls = nls; - dev->cn_queue = create_workqueue(dev->name); + dev->cn_queue = create_singlethread_workqueue(dev->name); if (!dev->cn_queue) { kfree(dev); return NULL; -- cgit v0.10.2 From 8f3ea33a5078a09eba12bfe57424507809367756 Mon Sep 17 00:00:00 2001 From: Martin Devera Date: Sun, 23 Mar 2008 22:00:38 -0700 Subject: sch_htb: fix "too many events" situation HTB is event driven algorithm and part of its work is to apply scheduled events at proper times. It tried to defend itself from livelock by processing only limited number of events per dequeue. Because of faster computers some users already hit this hardcoded limit. This patch limits processing up to 2 jiffies (why not 1 jiffie ? because it might stop prematurely when only fraction of jiffie remains). Signed-off-by: Martin Devera Signed-off-by: David S. Miller diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 795c761..66148cc 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -711,9 +711,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, */ static psched_time_t htb_do_events(struct htb_sched *q, int level) { - int i; - - for (i = 0; i < 500; i++) { + /* don't run for longer than 2 jiffies; 2 is used instead of + 1 to simplify things when jiffy is going to be incremented + too soon */ + unsigned long stop_at = jiffies + 2; + while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; struct rb_node *p = rb_first(&q->wait_pq[level]); @@ -731,9 +733,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - if (net_ratelimit()) - printk(KERN_WARNING "htb: too many events !\n"); - return q->now + PSCHED_TICKS_PER_SEC / 10; + /* too much load - let's continue on next jiffie */ + return q->now + PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL -- cgit v0.10.2