diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 23:29:25 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 23:29:25 (GMT) |
commit | 7a6362800cb7d1d618a697a650c7aaed3eb39320 (patch) | |
tree | 087f9bc6c13ef1fad4b392c5cf9325cd28fa8523 /net/core | |
parent | 6445ced8670f37cfc2c5e24a9de9b413dbfc788d (diff) | |
parent | ceda86a108671294052cbf51660097b6534672f5 (diff) | |
download | linux-7a6362800cb7d1d618a697a650c7aaed3eb39320.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1480 commits)
bonding: enable netpoll without checking link status
xfrm: Refcount destination entry on xfrm_lookup
net: introduce rx_handler results and logic around that
bonding: get rid of IFF_SLAVE_INACTIVE netdev->priv_flag
bonding: wrap slave state work
net: get rid of multiple bond-related netdevice->priv_flags
bonding: register slave pointer for rx_handler
be2net: Bump up the version number
be2net: Copyright notice change. Update to Emulex instead of ServerEngines
e1000e: fix kconfig for crc32 dependency
netfilter ebtables: fix xt_AUDIT to work with ebtables
xen network backend driver
bonding: Improve syslog message at device creation time
bonding: Call netif_carrier_off after register_netdevice
bonding: Incorrect TX queue offset
net_sched: fix ip_tos2prio
xfrm: fix __xfrm_route_forward()
be2net: Fix UDP packet detected status in RX compl
Phonet: fix aligned-mode pipe socket buffer header reserve
netxen: support for GbE port settings
...
Fix up conflicts in drivers/staging/brcm80211/brcmsmac/wl_mac80211.c
with the staging updates.
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 463 | ||||
-rw-r--r-- | net/core/dst.c | 43 | ||||
-rw-r--r-- | net/core/ethtool.c | 604 | ||||
-rw-r--r-- | net/core/fib_rules.c | 6 | ||||
-rw-r--r-- | net/core/filter.c | 6 | ||||
-rw-r--r-- | net/core/flow.c | 14 | ||||
-rw-r--r-- | net/core/neighbour.c | 13 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 17 | ||||
-rw-r--r-- | net/core/netpoll.c | 13 | ||||
-rw-r--r-- | net/core/pktgen.c | 233 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 86 | ||||
-rw-r--r-- | net/core/skbuff.c | 8 |
12 files changed, 1003 insertions, 503 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 6561021..0b88eba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include <trace/events/skb.h> #include <linux/pci.h> #include <linux/inetdevice.h> +#include <linux/cpu_rmap.h> #include "net-sysfs.h" @@ -1297,7 +1298,7 @@ static int __dev_close(struct net_device *dev) return retval; } -int dev_close_many(struct list_head *head) +static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(tmp_list); @@ -1605,6 +1606,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change + * @dev: Network device + * @txq: number of queues available + * + * If real_num_tx_queues is changed the tc mappings may no longer be + * valid. To resolve this verify the tc mapping remains valid and if + * not NULL the mapping. With no priorities mapping to this + * offset/count pair it will no longer be used. In the worst case TC0 + * is invalid nothing can be done so disable priority mappings. If is + * expected that drivers will fix this mapping if they can before + * calling netif_set_real_num_tx_queues. + */ +static void netif_setup_tc(struct net_device *dev, unsigned int txq) +{ + int i; + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + + /* If TC0 is invalidated disable TC mapping */ + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues changed " + "invalidating tc mappings. Priority " + "traffic classification disabled!\n"); + dev->num_tc = 0; + return; + } + + /* Invalidated prio to tc mappings set to TC0 */ + for (i = 1; i < TC_BITMASK + 1; i++) { + int q = netdev_get_prio_tc_map(dev, i); + + tc = &dev->tc_to_txq[q]; + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues " + "changed. Priority %i to tc " + "mapping %i is no longer valid " + "setting map to 0\n", + i, q); + netdev_set_prio_tc_map(dev, i, 0); + } + } +} + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1616,7 +1659,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; - if (dev->reg_state == NETREG_REGISTERED) { + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, @@ -1624,6 +1668,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (rc) return rc; + if (dev->num_tc) + netif_setup_tc(dev, txq); + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -1823,7 +1870,7 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -2011,7 +2058,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; @@ -2023,10 +2070,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features return features; } -int netif_skb_features(struct sk_buff *skb) +u32 netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - int features = skb->dev->features; + u32 features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2071,7 +2118,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - int features; + u32 features; /* * If device doesnt need skb->dst, release it right now while @@ -2173,6 +2220,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); @@ -2181,13 +2230,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, return hash; } + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); - return (u16) (((u64) hash * num_tx_queues) >> 32); + return (u16) (((u64) hash * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); @@ -2284,15 +2339,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); - bool contended = qdisc_is_running(q); + bool contended; int rc; + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. * This permits __QDISC_STATE_RUNNING owner to get the lock more often * and dequeue packets faster. */ + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -2310,7 +2368,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { @@ -2325,7 +2382,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); - rc = qdisc_enqueue_root(skb, q); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -2544,6 +2601,54 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +static struct rps_dev_flow * +set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) +{ + u16 tcpu; + + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) { +#ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *old_rflow; + u32 flow_id; + u16 rxq_index; + int rc; + + /* Should we steer this flow to a different hardware queue? */ + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || + !(dev->features & NETIF_F_NTUPLE)) + goto out; + rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); + if (rxq_index == skb_get_rx_queue(skb)) + goto out; + + rxqueue = dev->_rx + rxq_index; + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (!flow_table) + goto out; + flow_id = skb->rxhash & flow_table->mask; + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, + rxq_index, flow_id); + if (rc < 0) + goto out; + old_rflow = rflow; + rflow = &flow_table->flows[flow_id]; + rflow->cpu = next_cpu; + rflow->filter = rc; + if (old_rflow->filter == rflow->filter) + old_rflow->filter = RPS_NO_FILTER; + out: +#endif + rflow->last_qtail = + per_cpu(softnet_data, tcpu).input_queue_head; + } + + return rflow; +} + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. @@ -2615,12 +2720,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) - rflow->last_qtail = per_cpu(softnet_data, - tcpu).input_queue_head; - } + rflow->last_qtail)) >= 0)) + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; @@ -2641,6 +2743,46 @@ done: return cpu; } +#ifdef CONFIG_RFS_ACCEL + +/** + * rps_may_expire_flow - check whether an RFS hardware filter may be removed + * @dev: Device on which the filter was set + * @rxq_index: RX queue index + * @flow_id: Flow ID passed to ndo_rx_flow_steer() + * @filter_id: Filter ID returned by ndo_rx_flow_steer() + * + * Drivers that implement ndo_rx_flow_steer() should periodically call + * this function for each installed filter and remove the filters for + * which it returns %true. + */ +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id) +{ + struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; + int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); + if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) + expire = false; + } + rcu_read_unlock(); + return expire; +} +EXPORT_SYMBOL(rps_may_expire_flow); + +#endif /* CONFIG_RFS_ACCEL */ + /* Called from hardirq (IPI) context */ static void rps_trigger_softirq(void *data) { @@ -2928,6 +3070,8 @@ out: * on a failure. * * The caller must hold the rtnl_mutex. + * + * For a general description of rx_handler, see enum rx_handler_result. */ int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, @@ -2962,64 +3106,32 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, - struct net_device *master) +static void vlan_on_bond_hook(struct sk_buff *skb) { - if (skb->pkt_type == PACKET_HOST) { - u16 *dest = (u16 *) eth_hdr(skb)->h_dest; - - memcpy(dest, master->dev_addr, ETH_ALEN); - } -} - -/* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and - * ARP on active-backup slaves with arp_validate enabled. - */ -int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) -{ - struct net_device *dev = skb->dev; - - if (master->priv_flags & IFF_MASTER_ARPMON) - dev->last_rx = jiffies; - - if ((master->priv_flags & IFF_MASTER_ALB) && - (master->priv_flags & IFF_BRIDGE_PORT)) { - /* Do address unmangle. The local destination address - * will be always the one master has. Provides the right - * functionality in a bridge. - */ - skb_bond_set_mac_by_master(skb, master); - } - - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __cpu_to_be16(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __cpu_to_be16(ETH_P_SLOW)) - return 0; + /* + * Make sure ARP frames received on VLAN interfaces stacked on + * bonding interfaces still make their way to any base bonding + * device that may have registered for a specific ptype. + */ + if (skb->dev->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && + skb->protocol == htons(ETH_P_ARP)) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - return 1; + if (!skb2) + return; + skb2->dev = vlan_dev_real_dev(skb->dev); + netif_rx(skb2); } - return 0; } -EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *master; - struct net_device *null_or_orig; - struct net_device *orig_or_bond; + struct net_device *null_or_dev; + bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3034,28 +3146,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; - - /* - * bonding note: skbs received on inactive slaves should only - * be delivered to pkt handlers that are exact matches. Also - * the deliver_no_wcard flag will be set. If packet handlers - * are sensitive to duplicate packets these skbs will need to - * be dropped at the handler. - */ - null_or_orig = NULL; orig_dev = skb->dev; - master = ACCESS_ONCE(orig_dev->master); - if (skb->deliver_no_wcard) - null_or_orig = orig_dev; - else if (master) { - if (skb_bond_should_drop(skb, master)) { - skb->deliver_no_wcard = 1; - null_or_orig = orig_dev; /* deliver only exact match */ - } else - skb->dev = master; - } - __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -3064,6 +3156,10 @@ static int __netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); +another_round: + + __this_cpu_inc(softnet_data.processed); + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3072,8 +3168,7 @@ static int __netif_receive_skb(struct sk_buff *skb) #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { + if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3087,16 +3182,24 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - /* Handle special case of bridge or macvlan */ rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - skb = rx_handler(skb); - if (!skb) + switch (rx_handler(&skb)) { + case RX_HANDLER_CONSUMED: goto out; + case RX_HANDLER_ANOTHER: + goto another_round; + case RX_HANDLER_EXACT: + deliver_exact = true; + case RX_HANDLER_PASS: + break; + default: + BUG(); + } } if (vlan_tx_tag_present(skb)) { @@ -3111,24 +3214,17 @@ ncls: goto out; } - /* - * Make sure frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. The - * handler may have to adjust skb->dev and orig_dev. - */ - orig_or_bond = orig_dev; - if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && - (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - orig_or_bond = vlan_dev_real_dev(skb->dev); - } + vlan_on_bond_hook(skb); + + /* deliver only exact match when indicated */ + null_or_dev = deliver_exact ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && (ptype->dev == null_or_orig || - ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == orig_or_bond)) { + if (ptype->type == type && + (ptype->dev == null_or_dev || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3925,12 +4021,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = (v == SEQ_START_TOKEN) ? - first_net_device(seq_file_net(seq)) : - next_net_device((struct net_device *)v); + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + dev = first_net_device_rcu(seq_file_net(seq)); + else + dev = next_net_device_rcu(dev); ++*pos; - return rcu_dereference(dev); + return dev; } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4214,15 +4313,14 @@ static int __init dev_proc_init(void) /** - * netdev_set_master - set up master/slave pair + * netdev_set_master - set up master pointer * @slave: slave device * @master: new master device * * Changes the master device of the slave. Pass %NULL to break the * bonding. The caller must hold the RTNL semaphore. On a failure * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. + * are adjusted and the function returns zero. */ int netdev_set_master(struct net_device *slave, struct net_device *master) { @@ -4242,6 +4340,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) synchronize_net(); dev_put(old); } + return 0; +} +EXPORT_SYMBOL(netdev_set_master); + +/** + * netdev_set_bond_master - set up bonding master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success %RTM_NEWLINK is sent + * to the routing socket and the function returns zero. + */ +int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +{ + int err; + + ASSERT_RTNL(); + + err = netdev_set_master(slave, master); + if (err) + return err; if (master) slave->flags |= IFF_SLAVE; else @@ -4250,7 +4371,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_set_bond_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -4587,6 +4708,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) EXPORT_SYMBOL(dev_set_mtu); /** + * dev_set_group - Change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + dev->group = new_group; +} +EXPORT_SYMBOL(dev_set_group); + +/** * dev_set_mac_address - Change Media Access Control Address * @dev: device * @sa: new address @@ -5077,41 +5209,55 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } -unsigned long netdev_fix_features(unsigned long features, const char *name) +u32 netdev_fix_features(struct net_device *dev, u32 features) { + /* Fix illegal checksum combinations */ + if ((features & NETIF_F_HW_CSUM) && + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed HW and IP checksum settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((features & NETIF_F_NO_CSUM) && + (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed no checksumming and other settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); + netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); features &= ~NETIF_F_TSO; } + /* Software GSO depends on SG. */ + if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + features &= ~NETIF_F_GSO; + } + + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no checksum offload features.\n", - name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } } @@ -5120,6 +5266,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +void netdev_update_features(struct net_device *dev) +{ + u32 features; + int err = 0; + + features = netdev_get_wanted_features(dev); + + if (dev->netdev_ops->ndo_fix_features) + features = dev->netdev_ops->ndo_fix_features(dev, features); + + /* driver might be less strict about feature dependencies */ + features = netdev_fix_features(dev, features); + + if (dev->features == features) + return; + + netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + dev->features, features); + + if (dev->netdev_ops->ndo_set_features) + err = dev->netdev_ops->ndo_set_features(dev, features); + + if (!err) + dev->features = features; + else if (err < 0) + netdev_err(dev, + "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", + err, features, dev->features); +} +EXPORT_SYMBOL(netdev_update_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from @@ -5254,27 +5431,19 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). + */ + dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = dev->features & dev->hw_features; - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + /* Avoid warning from netdev_fix_features() for GSO without SG */ + if (!(dev->wanted_features & NETIF_F_SG)) { + dev->wanted_features &= ~NETIF_F_GSO; + dev->features &= ~NETIF_F_GSO; } - dev->features = netdev_fix_features(dev->features, dev->name); - - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -5291,6 +5460,8 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; + netdev_update_features(dev); + /* * Default initial state at registry is that the * device is present. @@ -5695,6 +5866,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif strcpy(dev->name, name); + dev->group = INIT_NETDEV_GROUP; return dev; free_all: @@ -6009,8 +6181,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) +u32 netdev_increment_features(u32 all, u32 one, u32 mask) { /* If device needs checksumming, downgrade to it. */ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) diff --git a/net/core/dst.c b/net/core/dst.c index b99c7c7..91104d3 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,7 +164,9 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -void *dst_alloc(struct dst_ops *ops) +const u32 dst_default_metrics[RTAX_MAX]; + +void *dst_alloc(struct dst_ops *ops, int initial_ref) { struct dst_entry *dst; @@ -175,11 +177,12 @@ void *dst_alloc(struct dst_ops *ops) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, 0); + atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; dst->input = dst->output = dst_discard; + dst_init_metrics(dst, dst_default_metrics, true); #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst) } EXPORT_SYMBOL(dst_release); +u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} +EXPORT_SYMBOL(dst_cow_metrics_generic); + +/* Caller asserts that dst_metrics_read_only(dst) is false. */ +void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + unsigned long prev, new; + + new = (unsigned long) dst_default_metrics; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +} +EXPORT_SYMBOL(__dst_destroy_metrics_generic); + /** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ff23029..c1a71bb 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_rx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_rx_csum); - u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; @@ -55,6 +49,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +EXPORT_SYMBOL(ethtool_op_set_tx_csum); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) { @@ -171,6 +166,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +#define ETHTOOL_DEV_FEATURE_WORDS 1 + +static void ethtool_get_features_compat(struct net_device *dev, + struct ethtool_get_features_block *features) +{ + if (!dev->ethtool_ops) + return; + + /* getting RX checksum */ + if (dev->ethtool_ops->get_rx_csum) + if (dev->ethtool_ops->get_rx_csum(dev)) + features[0].active |= NETIF_F_RXCSUM; + + /* mark legacy-changeable features */ + if (dev->ethtool_ops->set_sg) + features[0].available |= NETIF_F_SG; + if (dev->ethtool_ops->set_tx_csum) + features[0].available |= NETIF_F_ALL_CSUM; + if (dev->ethtool_ops->set_tso) + features[0].available |= NETIF_F_ALL_TSO; + if (dev->ethtool_ops->set_rx_csum) + features[0].available |= NETIF_F_RXCSUM; + if (dev->ethtool_ops->set_flags) + features[0].available |= flags_dup_features; +} + +static int ethtool_set_feature_compat(struct net_device *dev, + int (*legacy_set)(struct net_device *, u32), + struct ethtool_set_features_block *features, u32 mask) +{ + u32 do_set; + + if (!legacy_set) + return 0; + + if (!(features[0].valid & mask)) + return 0; + + features[0].valid &= ~mask; + + do_set = !!(features[0].requested & mask); + + if (legacy_set(dev, do_set) < 0) + netdev_info(dev, + "Legacy feature change (%s) failed for 0x%08x\n", + do_set ? "set" : "clear", mask); + + return 1; +} + +static int ethtool_set_features_compat(struct net_device *dev, + struct ethtool_set_features_block *features) +{ + int compat; + + if (!dev->ethtool_ops) + return 0; + + compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, + features, NETIF_F_SG); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, + features, NETIF_F_ALL_CSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, + features, NETIF_F_ALL_TSO); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, + features, NETIF_F_RXCSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags, + features, flags_dup_features); + + return compat; +} + +static int ethtool_get_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_gfeatures cmd = { + .cmd = ETHTOOL_GFEATURES, + .size = ETHTOOL_DEV_FEATURE_WORDS, + }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { + { + .available = dev->hw_features, + .requested = dev->wanted_features, + .active = dev->features, + .never_changed = NETIF_F_NEVER_CHANGE, + }, + }; + u32 __user *sizeaddr; + u32 copy_size; + + ethtool_get_features_compat(dev, features); + + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); + if (get_user(copy_size, sizeaddr)) + return -EFAULT; + + if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) + copy_size = ETHTOOL_DEV_FEATURE_WORDS; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + return -EFAULT; + + return 0; +} + +static int ethtool_set_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_sfeatures cmd; + struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; + int ret = 0; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + + if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) + return -EINVAL; + + if (copy_from_user(features, useraddr, sizeof(features))) + return -EFAULT; + + if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; + + if (ethtool_set_features_compat(dev, features)) + ret |= ETHTOOL_F_COMPAT; + + if (features[0].valid & ~dev->hw_features) { + features[0].valid &= dev->hw_features; + ret |= ETHTOOL_F_UNSUPPORTED; + } + + dev->wanted_features &= ~features[0].valid; + dev->wanted_features |= features[0].valid & features[0].requested; + netdev_update_features(dev); + + if ((dev->wanted_features ^ dev->features) & features[0].valid) + ret |= ETHTOOL_F_WISH; + + return ret; +} + +static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { + /* NETIF_F_SG */ "tx-scatter-gather", + /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", + /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", + /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", + /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_HIGHDMA */ "highdma", + /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", + /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", + + /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", + /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", + /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", + /* NETIF_F_GSO */ "tx-generic-segmentation", + /* NETIF_F_LLTX */ "tx-lockless", + /* NETIF_F_NETNS_LOCAL */ "netns-local", + /* NETIF_F_GRO */ "rx-gro", + /* NETIF_F_LRO */ "rx-lro", + + /* NETIF_F_TSO */ "tx-tcp-segmentation", + /* NETIF_F_UFO */ "tx-udp-fragmentation", + /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", + /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", + /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", + /* NETIF_F_FSO */ "tx-fcoe-segmentation", + "", + "", + + /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", + /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", + /* NETIF_F_FCOE_MTU */ "fcoe-mtu", + /* NETIF_F_NTUPLE */ "rx-ntuple-filter", + /* NETIF_F_RXHASH */ "rx-hashing", + /* NETIF_F_RXCSUM */ "rx-checksum", + "", + "", +}; + +static int __ethtool_get_sset_count(struct net_device *dev, int sset) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (sset == ETH_SS_FEATURES) + return ARRAY_SIZE(netdev_features_strings); + + if (ops && ops->get_sset_count && ops->get_strings) + return ops->get_sset_count(dev, sset); + else + return -EOPNOTSUPP; +} + +static void __ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (stringset == ETH_SS_FEATURES) + memcpy(data, netdev_features_strings, + sizeof(netdev_features_strings)); + else + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); +} + +static u32 ethtool_get_feature_mask(u32 eth_cmd) +{ + /* feature masks of legacy discrete ethtool ops */ + + switch (eth_cmd) { + case ETHTOOL_GTXCSUM: + case ETHTOOL_STXCSUM: + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GRXCSUM: + case ETHTOOL_SRXCSUM: + return NETIF_F_RXCSUM; + case ETHTOOL_GSG: + case ETHTOOL_SSG: + return NETIF_F_SG; + case ETHTOOL_GTSO: + case ETHTOOL_STSO: + return NETIF_F_ALL_TSO; + case ETHTOOL_GUFO: + case ETHTOOL_SUFO: + return NETIF_F_UFO; + case ETHTOOL_GGSO: + case ETHTOOL_SGSO: + return NETIF_F_GSO; + case ETHTOOL_GGRO: + case ETHTOOL_SGRO: + return NETIF_F_GRO; + default: + BUG(); + } +} + +static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return NULL; + + switch (ethcmd) { + case ETHTOOL_GTXCSUM: + return ops->get_tx_csum; + case ETHTOOL_GRXCSUM: + return ops->get_rx_csum; + case ETHTOOL_SSG: + return ops->get_sg; + case ETHTOOL_STSO: + return ops->get_tso; + case ETHTOOL_SUFO: + return ops->get_ufo; + default: + return NULL; + } +} + +static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) +{ + return !!(dev->features & NETIF_F_ALL_CSUM); +} + +static int ethtool_get_one_feature(struct net_device *dev, + char __user *useraddr, u32 ethcmd) +{ + u32 mask = ethtool_get_feature_mask(ethcmd); + struct ethtool_value edata = { + .cmd = ethcmd, + .data = !!(dev->features & mask), + }; + + /* compatibility with discrete get_ ops */ + if (!(dev->hw_features & mask)) { + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + + /* bug compatibility with old get_rx_csum */ + if (ethcmd == ETHTOOL_GRXCSUM && !actor) + actor = __ethtool_get_rx_csum_oldbug; + + if (actor) + edata.data = actor(dev); + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_sg(struct net_device *dev, u32 data); +static int __ethtool_set_tso(struct net_device *dev, u32 data); +static int __ethtool_set_ufo(struct net_device *dev, u32 data); + +static int ethtool_set_one_feature(struct net_device *dev, + void __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata; + u32 mask; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + mask = ethtool_get_feature_mask(ethcmd); + mask &= dev->hw_features; + if (mask) { + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; + + netdev_update_features(dev); + return 0; + } + + /* Driver is not converted to ndo_fix_features or does not + * support changing this offload. In the latter case it won't + * have corresponding ethtool_ops field set. + * + * Following part is to be removed after all drivers advertise + * their changeable features in netdev->hw_features and stop + * using discrete offload setting ops. + */ + + switch (ethcmd) { + case ETHTOOL_STXCSUM: + return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SRXCSUM: + return __ethtool_set_rx_csum(dev, edata.data); + case ETHTOOL_SSG: + return __ethtool_set_sg(dev, edata.data); + case ETHTOOL_STSO: + return __ethtool_set_tso(dev, edata.data); + case ETHTOOL_SUFO: + return __ethtool_set_ufo(dev, edata.data); + default: + return -EOPNOTSUPP; + } +} + +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + u32 changed; + + if (data & ~flags_dup_features) + return -EINVAL; + + /* legacy set_flags() op */ + if (dev->ethtool_ops->set_flags) { + if (unlikely(dev->hw_features & flags_dup_features)) + netdev_warn(dev, + "driver BUG: mixed hw_features and set_flags()\n"); + return dev->ethtool_ops->set_flags(dev, data); + } + + /* allow changing only bits set in hw_features */ + changed = (data ^ dev->wanted_features) & flags_dup_features; + if (changed & ~dev->hw_features) + return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; + + dev->wanted_features = + (dev->wanted_features & ~changed) | data; + + netdev_update_features(dev); + + return 0; +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -251,14 +621,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; u64 sset_mask; int i, idx = 0, n_bits = 0, ret, rc; u32 *info_buf = NULL; - if (!ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; @@ -285,7 +651,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, if (!(sset_mask & (1ULL << i))) continue; - rc = ops->get_sset_count(dev, i); + rc = __ethtool_get_sset_count(dev, i); if (rc >= 0) { info.sset_mask |= (1ULL << i); info_buf[idx++] = rc; @@ -1091,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + if (!data && dev->ethtool_ops->set_tso) { err = dev->ethtool_ops->set_tso(dev, 0); if (err) @@ -1105,145 +1474,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return dev->ethtool_ops->set_sg(dev, data); } -static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; int err; if (!dev->ethtool_ops->set_tx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) { + if (!data && dev->ethtool_ops->set_sg) { err = __ethtool_set_sg(dev, 0); if (err) return err; } - return dev->ethtool_ops->set_tx_csum(dev, edata.data); + return dev->ethtool_ops->set_tx_csum(dev, data); } -EXPORT_SYMBOL(ethtool_op_set_tx_csum); -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_rx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) + if (!data) dev->features &= ~NETIF_F_GRO; - return dev->ethtool_ops->set_rx_csum(dev, edata.data); + return dev->ethtool_ops->set_rx_csum(dev, data); } -static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tso(struct net_device *dev, u32 data) { - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && - !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - return __ethtool_set_sg(dev, edata.data); -} - -static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - if (!dev->ethtool_ops->set_tso) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - return dev->ethtool_ops->set_tso(dev, edata.data); + return dev->ethtool_ops->set_tso(dev, data); } -static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_ufo(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_ufo) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + if (data && !((dev->features & NETIF_F_GEN_CSUM) || (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, edata.data); -} - -static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGSO }; - - edata.data = dev->features & NETIF_F_GSO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data) - dev->features |= NETIF_F_GSO; - else - dev->features &= ~NETIF_F_GSO; - return 0; -} - -static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGRO }; - - edata.data = dev->features & NETIF_F_GRO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data) { - u32 rxcsum = dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum(dev) : - ethtool_op_get_rx_csum(dev); - - if (!rxcsum) - return -EINVAL; - dev->features |= NETIF_F_GRO; - } else - dev->features &= ~NETIF_F_GRO; - - return 0; + return dev->ethtool_ops->set_ufo(dev, data); } static int ethtool_self_test(struct net_device *dev, char __user *useraddr) @@ -1287,17 +1566,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; - if (!ops->get_strings || !ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - ret = ops->get_sset_count(dev, gstrings.string_set); + ret = __ethtool_get_sset_count(dev, gstrings.string_set); if (ret < 0) return ret; @@ -1307,7 +1582,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - ops->get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1317,7 +1592,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; ret = 0; - out: +out: kfree(data); return ret; } @@ -1458,7 +1733,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - unsigned long old_features; + u32 old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; @@ -1500,6 +1775,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: + case ETHTOOL_GFEATURES: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1570,42 +1846,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SPAUSEPARAM: rc = ethtool_set_pauseparam(dev, useraddr); break; - case ETHTOOL_GRXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum : - ethtool_op_get_rx_csum)); - break; - case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); - break; - case ETHTOOL_GTXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tx_csum ? - dev->ethtool_ops->get_tx_csum : - ethtool_op_get_tx_csum)); - break; - case ETHTOOL_STXCSUM: - rc = ethtool_set_tx_csum(dev, useraddr); - break; - case ETHTOOL_GSG: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_sg ? - dev->ethtool_ops->get_sg : - ethtool_op_get_sg)); - break; - case ETHTOOL_SSG: - rc = ethtool_set_sg(dev, useraddr); - break; - case ETHTOOL_GTSO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tso ? - dev->ethtool_ops->get_tso : - ethtool_op_get_tso)); - break; - case ETHTOOL_STSO: - rc = ethtool_set_tso(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1621,21 +1861,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; - case ETHTOOL_GUFO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_ufo ? - dev->ethtool_ops->get_ufo : - ethtool_op_get_ufo)); - break; - case ETHTOOL_SUFO: - rc = ethtool_set_ufo(dev, useraddr); - break; - case ETHTOOL_GGSO: - rc = ethtool_get_gso(dev, useraddr); - break; - case ETHTOOL_SGSO: - rc = ethtool_set_gso(dev, useraddr); - break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, (dev->ethtool_ops->get_flags ? @@ -1643,8 +1868,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_flags); + rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); break; case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1666,12 +1890,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_GGRO: - rc = ethtool_get_gro(dev, useraddr); - break; - case ETHTOOL_SGRO: - rc = ethtool_set_gro(dev, useraddr); - break; case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; @@ -1693,6 +1911,30 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GFEATURES: + rc = ethtool_get_features(dev, useraddr); + break; + case ETHTOOL_SFEATURES: + rc = ethtool_set_features(dev, useraddr); + break; + case ETHTOOL_GTXCSUM: + case ETHTOOL_GRXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GTSO: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + rc = ethtool_get_one_feature(dev, useraddr, ethcmd); + break; + case ETHTOOL_STXCSUM: + case ETHTOOL_SRXCSUM: + case ETHTOOL_SSG: + case ETHTOOL_STSO: + case ETHTOOL_SUFO: + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + rc = ethtool_set_one_feature(dev, useraddr, ethcmd); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a20e5d3..8248ebb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->oif)) + if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; - if ((rule->mark ^ fl->mark) & rule->mark_mask) + if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; ret = ops->match(rule, fl, flags); diff --git a/net/core/filter.c b/net/core/filter.c index afc5837..232b187 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) if (err) return err; - rcu_read_lock_bh(); - filter = rcu_dereference_bh(sk->sk_filter); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7..990703b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); @@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t; * important assumptions that we can here, such as alignment and * constant size. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) { - flow_compare_t *k1, *k1_lim, *k2; + const flow_compare_t *k1, *k1_lim, *k2; const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); - k1 = (flow_compare_t *) key1; + k1 = (const flow_compare_t *) key1; k1_lim = k1 + n_elem; - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { struct flow_cache *fc = &flow_cache_global; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a9029..799f06e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { size_t size = entries * sizeof(struct neighbour *); struct neigh_hash_table *ret; - struct neighbour **buckets; + struct neighbour __rcu **buckets; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) if (size <= PAGE_SIZE) buckets = kzalloc(size, GFP_ATOMIC); else - buckets = (struct neighbour **) + buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); if (!buckets) { kfree(ret); return NULL; } - rcu_assign_pointer(ret->hash_buckets, buckets); + ret->hash_buckets = buckets; ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); - struct neighbour **buckets = nht->hash_buckets; + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) kfree(buckets); @@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->nht = neigh_hash_alloc(8); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e23c01b..5ceb257 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_long_hex); +NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); @@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev, return ret; } +NETDEVICE_SHOW(group, fmt_dec); + +static int change_group(struct net_device *net, unsigned long new_group) +{ + dev_set_group(net, (int) new_group); + return 0; +} + +static ssize_t store_group(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_group); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), @@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 02dc2cb..06be243 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + if (dev->priv_flags & IFF_SLAVE) { + if (dev->npinfo) { + struct net_device *bond_dev = dev->master; + struct sk_buff *skb; + while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + skb->dev = bond_dev; + skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + } + } + } + service_arp_queue(dev->npinfo); zap_completion_queue(); @@ -313,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { - dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); - dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b5bada9..0c55eaa 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -251,6 +251,7 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + struct page *page; u64 delay; /* nano-seconds */ __u64 count; /* Default No packets to send */ @@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file, if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); + if (pkt_dev->page) { + put_page(pkt_dev->page); + pkt_dev->page = NULL; + } } else sprintf(pg_result, "ERROR: node not possible"); @@ -2605,6 +2610,89 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, return htons(id | (cfi << 12) | (prio << 13)); } +static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, + int datalen) +{ + struct timeval timestamp; + struct pktgen_hdr *pgh; + + pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + datalen -= sizeof(*pgh); + + if (pkt_dev->nfrags <= 0) { + memset(skb_put(skb, datalen), 0, datalen); + } else { + int frags = pkt_dev->nfrags; + int i, len; + + + if (frags > MAX_SKB_FRAGS) + frags = MAX_SKB_FRAGS; + len = datalen - frags * PAGE_SIZE; + if (len > 0) { + memset(skb_put(skb, len), 0, len); + datalen = frags * PAGE_SIZE; + } + + i = 0; + while (datalen > 0) { + if (unlikely(!pkt_dev->page)) { + int node = numa_node_id(); + + if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) + node = pkt_dev->node; + pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!pkt_dev->page) + break; + } + skb_shinfo(skb)->frags[i].page = pkt_dev->page; + get_page(pkt_dev->page); + skb_shinfo(skb)->frags[i].page_offset = 0; + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + datalen -= skb_shinfo(skb)->frags[i].size; + skb->len += skb_shinfo(skb)->frags[i].size; + skb->data_len += skb_shinfo(skb)->frags[i].size; + i++; + skb_shinfo(skb)->nr_frags = i; + } + + while (i < frags) { + int rem; + + if (i == 0) + break; + + rem = skb_shinfo(skb)->frags[i - 1].size / 2; + if (rem == 0) + break; + + skb_shinfo(skb)->frags[i - 1].size -= rem; + + skb_shinfo(skb)->frags[i] = + skb_shinfo(skb)->frags[i - 1]; + get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = + skb_shinfo(skb)->frags[i - 1].page; + skb_shinfo(skb)->frags[i].page_offset += + skb_shinfo(skb)->frags[i - 1].size; + skb_shinfo(skb)->frags[i].size = rem; + i++; + skb_shinfo(skb)->nr_frags = i; + } + } + + /* Stamp the time, and sequence number, + * convert them to network byte order + */ + pgh->pgh_magic = htonl(PKTGEN_MAGIC); + pgh->seq_num = htonl(pkt_dev->seq_num); + + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2613,7 +2701,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct udphdr *udph; int datalen, iplen; struct iphdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IP); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -2729,76 +2816,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); - } else { - int frags = pkt_dev->nfrags; - int i, len; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - len = datalen - frags * PAGE_SIZE; - memset(skb_put(skb, len), 0, len); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } + pktgen_finalize_skb(pkt_dev, skb, datalen); #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) @@ -2980,7 +2998,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct udphdr *udph; int datalen; struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -3083,75 +3100,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - * should we update cloned packets too ? - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pktgen_finalize_skb(pkt_dev, skb, datalen); return skb; } @@ -3884,6 +3833,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); + if (pkt_dev->page) + put_page(pkt_dev->page); kfree(pkt_dev); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d65c6b..49f7ea5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_U32(skb, IFLA_GROUP, dev->group); if (dev->ifindex != dev->iflink) NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); @@ -1035,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, + [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1177,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) return err; } +static int do_set_master(struct net_device *dev, int ifindex) +{ + struct net_device *master_dev; + const struct net_device_ops *ops; + int err; + + if (dev->master) { + if (dev->master->ifindex == ifindex) + return 0; + ops = dev->master->netdev_ops; + if (ops->ndo_del_slave) { + err = ops->ndo_del_slave(dev->master, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + + if (ifindex) { + master_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!master_dev) + return -EINVAL; + ops = master_dev->netdev_ops; + if (ops->ndo_add_slave) { + err = ops->ndo_add_slave(master_dev, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + return 0; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -1264,6 +1301,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_GROUP]) { + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + modified = 1; + } + /* * Interface selected by interface index but interface * name provided implies that a name change has been @@ -1295,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); @@ -1541,6 +1590,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (tb[IFLA_GROUP]) + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); return dev; @@ -1551,6 +1602,24 @@ err: } EXPORT_SYMBOL(rtnl_create_link); +static int rtnl_group_changelink(struct net *net, int group, + struct ifinfomsg *ifm, + struct nlattr **tb) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + if (dev->group == group) { + err = do_setlink(dev, ifm, tb, NULL, 0); + if (err < 0) + return err; + } + } + + return 0; +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1578,10 +1647,12 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; + else { + if (ifname[0]) + dev = __dev_get_by_name(net, ifname); + else + dev = NULL; + } err = validate_linkmsg(dev, tb); if (err < 0) @@ -1645,8 +1716,13 @@ replay: return do_setlink(dev, ifm, tb, ifname, modified); } - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); return -ENODEV; + } if (ifm->ifi_index) return -EOPNOTSUPP; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d883dcc..801dd08 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -523,7 +523,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; - new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif @@ -2434,8 +2433,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -ENOMEM; /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, frg_cnt, page, 0, 0); skb->truesize += PAGE_SIZE; atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); @@ -2455,7 +2452,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -EFAULT; /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -2498,7 +2494,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2508,7 +2504,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) unsigned int offset = doffset; unsigned int headroom; unsigned int len; - int sg = features & NETIF_F_SG; + int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; |