summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-10-09 22:35:22 (GMT)
committerJ. Bruce Fields <bfields@redhat.com>2012-10-09 22:35:22 (GMT)
commitf474af7051212b4efc8267583fad9c4ebf33ccff (patch)
tree1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /net/core
parent0d22f68f02c10d5d10ec5712917e5828b001a822 (diff)
parente3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff)
downloadlinux-f474af7051212b4efc8267583fad9c4ebf33ccff.tar.xz
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which the preparatory patches were pulled recently. After these patches, userspace headers will be segregated into: include/uapi/linux/.../foo.h for the userspace interface stuff, and: include/linux/.../foo.h for the strictly kernel internal stuff. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c171
-rw-r--r--net/core/dev_addr_lists.c40
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c27
-rw-r--r--net/core/link_watch.c29
-rw-r--r--net/core/neighbour.c10
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netpoll.c104
-rw-r--r--net/core/netprio_cgroup.c124
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/request_sock.c95
-rw-r--r--net/core/rtnetlink.c38
-rw-r--r--net/core/scm.c51
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/core/skbuff.c90
-rw-r--r--net/core/sock.c101
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/utils.c20
20 files changed, 589 insertions, 359 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a39354e..1e0a184 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_get_valid_name(struct net_device *dev, const char *name)
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
{
- struct net *net;
+ char buf[IFNAMSIZ];
+ int ret;
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
+static int dev_get_valid_name(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ BUG_ON(!net);
if (!dev_valid_name(name))
return -EINVAL;
if (strchr(name, '%'))
- return dev_alloc_name(dev, name);
+ return dev_alloc_name_ns(net, dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
else if (dev->name != name)
@@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- err = dev_get_valid_name(dev, newname);
+ err = dev_get_valid_name(net, dev, newname);
if (err < 0)
return err;
@@ -1109,11 +1121,23 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-int netdev_bonding_change(struct net_device *dev, unsigned long event)
+/**
+ * netdev_notify_peers - notify network peers about existence of @dev
+ * @dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netdev_notify_peers(struct net_device *dev)
{
- return call_netdevice_notifiers(event, dev);
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ rtnl_unlock();
}
-EXPORT_SYMBOL(netdev_bonding_change);
+EXPORT_SYMBOL(netdev_notify_peers);
/**
* dev_load - load a network module
@@ -1394,7 +1418,6 @@ rollback:
nb->notifier_call(nb, NETDEV_DOWN, dev);
}
nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
- nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
}
}
@@ -1436,7 +1459,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
nb->notifier_call(nb, NETDEV_DOWN, dev);
}
nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
- nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
}
}
unlock:
@@ -1642,6 +1664,19 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+ if (ptype->af_packet_priv == NULL)
+ return false;
+
+ if (ptype->id_match)
+ return ptype->id_match(ptype, skb->sk);
+ else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+ return true;
+
+ return false;
+}
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
@@ -1659,8 +1694,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* they originated from - MvS (miquels@drinkel.ow.org)
*/
if ((ptype->dev == dev || !ptype->dev) &&
- (ptype->af_packet_priv == NULL ||
- (struct sock *)ptype->af_packet_priv != skb->sk)) {
+ (!skb_loop_sk(ptype, skb))) {
if (pt_prev) {
deliver_skb(skb2, pt_prev, skb->dev);
pt_prev = ptype;
@@ -2122,7 +2156,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
static netdev_features_t harmonize_features(struct sk_buff *skb,
__be16 protocol, netdev_features_t features)
{
- if (!can_checksum_protocol(features, protocol)) {
+ if (skb->ip_summed != CHECKSUM_NONE &&
+ !can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
features &= ~NETIF_F_SG;
} else if (illegal_highdma(skb->dev, skb)) {
@@ -2162,9 +2197,7 @@ EXPORT_SYMBOL(netif_skb_features);
/*
* Returns true if either:
* 1. skb has frag_list and the device doesn't support FRAGLIST, or
- * 2. skb is fragmented and the device does not support SG, or if
- * at least one of fragments is in highmem and device does not
- * support DMA from it.
+ * 2. skb is fragmented and the device does not support SG.
*/
static inline int skb_needs_linearize(struct sk_buff *skb,
int features)
@@ -2193,9 +2226,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
-
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
@@ -2230,6 +2260,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
+
skb_len = skb->len;
rc = ops->ndo_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
@@ -2252,6 +2285,9 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(nskb, dev);
+
skb_len = nskb->len;
rc = ops->ndo_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
@@ -2361,8 +2397,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
#endif
}
-static struct netdev_queue *dev_pick_tx(struct net_device *dev,
- struct sk_buff *skb)
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
{
int queue_index;
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2536,7 +2572,7 @@ int dev_queue_xmit(struct sk_buff *skb)
skb_update_prio(skb);
- txq = dev_pick_tx(dev, skb);
+ txq = netdev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
#ifdef CONFIG_NET_CLS_ACT
@@ -2609,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit);
=======================================================================*/
int netdev_max_backlog __read_mostly = 1000;
+EXPORT_SYMBOL(netdev_max_backlog);
+
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
int weight_p __read_mostly = 64; /* old backlog weight */
@@ -2635,15 +2673,16 @@ void __skb_get_rxhash(struct sk_buff *skb)
if (!skb_flow_dissect(skb, &keys))
return;
- if (keys.ports) {
- if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
- swap(keys.port16[0], keys.port16[1]);
+ if (keys.ports)
skb->l4_rxhash = 1;
- }
/* get a consistent hash (same value on both flow directions) */
- if ((__force u32)keys.dst < (__force u32)keys.src)
+ if (((__force u32)keys.dst < (__force u32)keys.src) ||
+ (((__force u32)keys.dst == (__force u32)keys.src) &&
+ ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
swap(keys.dst, keys.src);
+ swap(keys.port16[0], keys.port16[1]);
+ }
hash = jhash_3words((__force u32)keys.dst,
(__force u32)keys.src,
@@ -3309,7 +3348,7 @@ ncls:
if (pt_prev) {
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
- ret = -ENOMEM;
+ goto drop;
else
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
@@ -4498,8 +4537,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned int old_flags = dev->flags;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
ASSERT_RTNL();
@@ -4530,8 +4569,9 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
dev->name, (dev->flags & IFF_PROMISC),
(old_flags & IFF_PROMISC),
- audit_get_loginuid(current),
- uid, gid,
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ from_kuid(&init_user_ns, uid),
+ from_kgid(&init_user_ns, gid),
audit_get_sessionid(current));
}
@@ -5224,12 +5264,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
*/
static int dev_new_index(struct net *net)
{
- static int ifindex;
+ int ifindex = net->ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
if (!__dev_get_by_index(net, ifindex))
- return ifindex;
+ return net->ifindex = ifindex;
}
}
@@ -5307,10 +5347,6 @@ static void rollback_registered_many(struct list_head *head)
netdev_unregister_kobject(dev);
}
- /* Process any work delayed until the end of the batch */
- dev = list_first_entry(head, struct net_device, unreg_list);
- call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
-
synchronize_net();
list_for_each_entry(dev, head, unreg_list)
@@ -5568,7 +5604,7 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = dev_get_valid_name(dev, dev->name);
+ ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
@@ -5582,7 +5618,12 @@ int register_netdevice(struct net_device *dev)
}
}
- dev->ifindex = dev_new_index(net);
+ ret = -EBUSY;
+ if (!dev->ifindex)
+ dev->ifindex = dev_new_index(net);
+ else if (__dev_get_by_index(net, dev->ifindex))
+ goto err_uninit;
+
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
@@ -5625,6 +5666,8 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
+ linkwatch_init_dev(dev);
+
dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);
@@ -5732,6 +5775,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
/**
* netdev_wait_allrefs - wait until all references are gone.
+ * @dev: target net_device
*
* This is called when unregistering network devices.
*
@@ -5757,9 +5801,12 @@ static void netdev_wait_allrefs(struct net_device *dev)
/* Rebroadcast unregister notification */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
- * should have already handle it the first time */
+ __rtnl_unlock();
+ rcu_barrier();
+ rtnl_lock();
+
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
@@ -5821,9 +5868,8 @@ void netdev_run_todo(void)
__rtnl_unlock();
- /* Wait for rcu callbacks to finish before attempting to drain
- * the device list. This usually avoids a 250ms wait.
- */
+
+ /* Wait for rcu callbacks to finish before next phase */
if (!list_empty(&list))
rcu_barrier();
@@ -5832,6 +5878,10 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
+ __rtnl_unlock();
+
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
@@ -5927,6 +5977,8 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
return queue;
}
+static const struct ethtool_ops default_ethtool_ops;
+
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -6014,6 +6066,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
strcpy(dev->name, name);
dev->group = INIT_NETDEV_GROUP;
+ if (!dev->ethtool_ops)
+ dev->ethtool_ops = &default_ethtool_ops;
return dev;
free_all:
@@ -6198,7 +6252,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (dev_get_valid_name(dev, pat) < 0)
+ if (dev_get_valid_name(net, dev, pat) < 0)
goto out;
}
@@ -6226,7 +6280,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
the device is just moving and can keep their slaves up.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+ rcu_barrier();
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
/*
@@ -6409,22 +6464,26 @@ const char *netdev_drivername(const struct net_device *dev)
return empty;
}
-int __netdev_printk(const char *level, const struct net_device *dev,
+static int __netdev_printk(const char *level, const struct net_device *dev,
struct va_format *vaf)
{
int r;
- if (dev && dev->dev.parent)
- r = dev_printk(level, dev->dev.parent, "%s: %pV",
- netdev_name(dev), vaf);
- else if (dev)
+ if (dev && dev->dev.parent) {
+ r = dev_printk_emit(level[1] - '0',
+ dev->dev.parent,
+ "%s %s %s: %pV",
+ dev_driver_string(dev->dev.parent),
+ dev_name(dev->dev.parent),
+ netdev_name(dev), vaf);
+ } else if (dev) {
r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
- else
+ } else {
r = printk("%s(NULL net_device): %pV", level, vaf);
+ }
return r;
}
-EXPORT_SYMBOL(__netdev_printk);
int netdev_printk(const char *level, const struct net_device *dev,
const char *format, ...)
@@ -6439,6 +6498,7 @@ int netdev_printk(const char *level, const struct net_device *dev,
vaf.va = &args;
r = __netdev_printk(level, dev, &vaf);
+
va_end(args);
return r;
@@ -6458,6 +6518,7 @@ int func(const struct net_device *dev, const char *fmt, ...) \
vaf.va = &args; \
\
r = __netdev_printk(level, dev, &vaf); \
+ \
va_end(args); \
\
return r; \
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c4cc2bc..87cc17d 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,7 @@
*/
static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
}
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
}
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
+static int __hw_addr_add(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
{
return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
}
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
return -ENOENT;
}
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
{
return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
}
@@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init);
*
* The caller must hold the rtnl_mutex.
*/
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type)
{
int err;
@@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add);
*
* The caller must hold the rtnl_mutex.
*/
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type)
{
int err;
@@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple);
* @dev: device
* @addr: address to add
*/
-int dev_uc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
{
struct netdev_hw_addr *ha;
int err;
@@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl);
* Add a secondary unicast address to the device or increase
* the reference count if it already exists.
*/
-int dev_uc_add(struct net_device *dev, unsigned char *addr)
+int dev_uc_add(struct net_device *dev, const unsigned char *addr)
{
int err;
@@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add);
* Release reference to a secondary unicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_uc_del(struct net_device *dev, unsigned char *addr)
+int dev_uc_del(struct net_device *dev, const unsigned char *addr)
{
int err;
@@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init);
* @dev: device
* @addr: address to add
*/
-int dev_mc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
{
struct netdev_hw_addr *ha;
int err;
@@ -566,7 +568,7 @@ out:
}
EXPORT_SYMBOL(dev_mc_add_excl);
-static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
bool global)
{
int err;
@@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
* Add a multicast address to the device or increase
* the reference count if it already exists.
*/
-int dev_mc_add(struct net_device *dev, unsigned char *addr)
+int dev_mc_add(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_add(dev, addr, false);
}
@@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add);
*
* Add a global multicast address to the device.
*/
-int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_add(dev, addr, true);
}
EXPORT_SYMBOL(dev_mc_add_global);
-static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
bool global)
{
int err;
@@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
* Release reference to a multicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_mc_del(struct net_device *dev, unsigned char *addr)
+int dev_mc_del(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_del(dev, addr, false);
}
@@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del);
* Release reference to a multicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_del(dev, addr, true);
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 56d6361..ee6153e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -222,8 +222,8 @@ void __dst_free(struct dst_entry *dst)
if (dst_garbage.timer_inc > DST_GC_INC) {
dst_garbage.timer_inc = DST_GC_INC;
dst_garbage.timer_expires = DST_GC_MIN;
- cancel_delayed_work(&dst_gc_work);
- schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
}
spin_unlock_bh(&dst_garbage.lock);
}
@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
struct dst_entry *dst, *last = NULL;
switch (event) {
- case NETDEV_UNREGISTER:
+ case NETDEV_UNREGISTER_FINAL:
case NETDEV_DOWN:
mutex_lock(&dst_gc_mutex);
for (dst = dst_busy_list; dst; dst = dst->next) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cbf033d..4d64cc2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1426,18 +1426,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
- if (!dev->ethtool_ops) {
- /* A few commands do not require any driver support,
- * are unprivileged, and do not change anything, so we
- * can take a shortcut to them. */
- if (ethcmd == ETHTOOL_GDRVINFO)
- return ethtool_get_drvinfo(dev, useraddr);
- else if (ethcmd == ETHTOOL_GET_TS_INFO)
- return ethtool_get_ts_info(dev, useraddr);
- else
- return -EOPNOTSUPP;
- }
-
/* Allow some commands to be done by anyone */
switch (ethcmd) {
case ETHTOOL_GSET:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83..58a4ba2 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
return 0;
@@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
- NETLINK_CB(skb).pid);
+ NETLINK_CB(skb).portid);
if (ops->delete)
ops->delete(rule);
fib_rule_put(rule);
@@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
if (idx < cb->args[1])
goto skip;
- if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+ if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWRULE,
NLM_F_MULTI, ops) < 0)
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 907efd2..3d92ebb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -167,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
case BPF_S_ALU_DIV_K:
A = reciprocal_divide(A, K);
continue;
+ case BPF_S_ALU_MOD_X:
+ if (X == 0)
+ return 0;
+ A %= X;
+ continue;
+ case BPF_S_ALU_MOD_K:
+ A %= K;
+ continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
@@ -179,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
case BPF_S_ALU_OR_K:
A |= K;
continue;
+ case BPF_S_ANC_ALU_XOR_X:
+ case BPF_S_ALU_XOR_X:
+ A ^= X;
+ continue;
+ case BPF_S_ALU_XOR_K:
+ A ^= K;
+ continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
@@ -326,9 +341,6 @@ load_b:
case BPF_S_ANC_CPU:
A = raw_smp_processor_id();
continue;
- case BPF_S_ANC_ALU_XOR_X:
- A ^= X;
- continue;
case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
@@ -469,10 +481,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
[BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
[BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
[BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
+ [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
[BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
[BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
[BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
[BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
+ [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
[BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
[BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
[BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
@@ -531,6 +547,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
return -EINVAL;
ftest->k = reciprocal_value(ftest->k);
break;
+ case BPF_S_ALU_MOD_K:
+ /* check for division by zero */
+ if (ftest->k == 0)
+ return -EINVAL;
+ break;
case BPF_S_LD_MEM:
case BPF_S_LDX_MEM:
case BPF_S_ST:
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c3519c6..8f82a5c 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev)
}
+void linkwatch_init_dev(struct net_device *dev)
+{
+ /* Handle pre-registration link state changes */
+ if (!netif_carrier_ok(dev) || netif_dormant(dev))
+ rfc2863_policy(dev);
+}
+
+
static bool linkwatch_urgent_event(struct net_device *dev)
{
if (!netif_running(dev))
@@ -120,22 +128,13 @@ static void linkwatch_schedule_work(int urgent)
delay = 0;
/*
- * This is true if we've scheduled it immeditately or if we don't
- * need an immediate execution and it's already pending.
+ * If urgent, schedule immediate execution; otherwise, don't
+ * override the existing timer.
*/
- if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
- return;
-
- /* Don't bother if there is nothing urgent. */
- if (!test_bit(LW_URGENT, &linkwatch_flags))
- return;
-
- /* It's already running which is good enough. */
- if (!__cancel_delayed_work(&linkwatch_work))
- return;
-
- /* Otherwise we reschedule it again for immediate execution. */
- schedule_delayed_work(&linkwatch_work, 0);
+ if (test_bit(LW_URGENT, &linkwatch_flags))
+ mod_delayed_work(system_wq, &linkwatch_work, 0);
+ else
+ schedule_delayed_work(&linkwatch_work, delay);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 117afaf..baca771 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot allocate neighbour cache hashes");
rwlock_init(&tbl->lock);
- INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+ INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
skb_queue_head_init_class(&tbl->proxy_queue,
@@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (tidx < tbl_skip || (family && tbl->family != family))
continue;
- if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+ if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
NLM_F_MULTI) <= 0)
break;
@@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (neightbl_fill_param_info(skb, tbl, p,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
NLM_F_MULTI) <= 0)
@@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
continue;
if (idx < s_idx)
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) <= 0) {
@@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
continue;
if (idx < s_idx)
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI, tbl) <= 0) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7260717..bcf02f6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -166,9 +166,21 @@ static ssize_t show_duplex(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!__ethtool_get_settings(netdev, &cmd))
- ret = sprintf(buf, "%s\n",
- cmd.duplex ? "full" : "half");
+ if (!__ethtool_get_settings(netdev, &cmd)) {
+ const char *duplex;
+ switch (cmd.duplex) {
+ case DUPLEX_HALF:
+ duplex = "half";
+ break;
+ case DUPLEX_FULL:
+ duplex = "full";
+ break;
+ default:
+ duplex = "unknown";
+ break;
+ }
+ ret = sprintf(buf, "%s\n", duplex);
+ }
}
rtnl_unlock();
return ret;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e4..77a0388 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,6 +26,7 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/if_vlan.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
@@ -54,7 +55,7 @@ static atomic_t trapped;
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
@@ -170,7 +171,8 @@ static void poll_napi(struct net_device *dev)
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(dev->npinfo, napi, budget);
+ budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+ napi, budget);
spin_unlock(&napi->poll_lock);
if (!budget)
@@ -185,13 +187,14 @@ static void service_arp_queue(struct netpoll_info *npi)
struct sk_buff *skb;
while ((skb = skb_dequeue(&npi->arp_tx)))
- arp_reply(skb);
+ netpoll_arp_reply(skb, npi);
}
}
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
+ struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
if (!dev || !netif_running(dev))
return;
@@ -206,17 +209,18 @@ static void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
if (dev->flags & IFF_SLAVE) {
- if (dev->npinfo) {
+ if (ni) {
struct net_device *bond_dev = dev->master;
struct sk_buff *skb;
- while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+ struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+ while ((skb = skb_dequeue(&ni->arp_tx))) {
skb->dev = bond_dev;
- skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+ skb_queue_tail(&bond_ni->arp_tx, skb);
}
}
}
- service_arp_queue(dev->npinfo);
+ service_arp_queue(ni);
zap_completion_queue();
}
@@ -302,6 +306,7 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
+/* call with IRQ disabled */
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev)
{
@@ -309,8 +314,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
unsigned long tries;
const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
- struct netpoll_info *npinfo = np->dev->npinfo;
+ struct netpoll_info *npinfo;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
__kfree_skb(skb);
return;
@@ -319,16 +327,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
struct netdev_queue *txq;
- unsigned long flags;
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = netdev_pick_tx(dev, skb);
- local_irq_save(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_xmit_stopped(txq)) {
+ if (vlan_tx_tag_present(skb) &&
+ !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ break;
+ skb->vlan_tci = 0;
+ }
+
status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
@@ -347,10 +361,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
WARN_ONCE(!irqs_disabled(),
- "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+ "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
dev->name, ops->ndo_start_xmit);
- local_irq_restore(flags);
}
if (status != NETDEV_TX_OK) {
@@ -367,6 +380,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
+ static atomic_t ip_ident;
udp_len = len + sizeof(*udph);
ip_len = udp_len + sizeof(*iph);
@@ -402,7 +416,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
put_unaligned(0x45, (unsigned char *)iph);
iph->tos = 0;
put_unaligned(htons(ip_len), &(iph->tot_len));
- iph->id = 0;
+ iph->id = htons(atomic_inc_return(&ip_ident));
iph->frag_off = 0;
iph->ttl = 64;
iph->protocol = IPPROTO_UDP;
@@ -423,9 +437,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
{
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
@@ -543,13 +556,12 @@ static void arp_reply(struct sk_buff *skb)
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
{
int proto, len, ulen;
int hits = 0;
const struct iphdr *iph;
struct udphdr *uh;
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct netpoll *np, *tmp;
if (list_empty(&npinfo->rx_np))
@@ -565,6 +577,12 @@ int __netpoll_rx(struct sk_buff *skb)
return 1;
}
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ skb = vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ }
+
proto = ntohs(eth_hdr(skb)->h_proto);
if (proto != ETH_P_IP)
goto out;
@@ -715,7 +733,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
EXPORT_SYMBOL(netpoll_parse_options);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
@@ -734,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
}
if (!ndev->npinfo) {
- npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+ npinfo = kmalloc(sizeof(*npinfo), gfp);
if (!npinfo) {
err = -ENOMEM;
goto out;
@@ -752,7 +770,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
ops = np->dev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo);
+ err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
if (err)
goto free_npinfo;
}
@@ -857,7 +875,7 @@ int netpoll_setup(struct netpoll *np)
refill_skbs();
rtnl_lock();
- err = __netpoll_setup(np, ndev);
+ err = __netpoll_setup(np, ndev, GFP_KERNEL);
rtnl_unlock();
if (err)
@@ -878,6 +896,24 @@ static int __init netpoll_init(void)
}
core_initcall(netpoll_init);
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+ struct netpoll_info *npinfo =
+ container_of(rcu_head, struct netpoll_info, rcu);
+
+ skb_queue_purge(&npinfo->arp_tx);
+ skb_queue_purge(&npinfo->txq);
+
+ /* we can't call cancel_delayed_work_sync here, as we are in softirq */
+ cancel_delayed_work(&npinfo->tx_work);
+
+ /* clean after last, unfinished work */
+ __skb_queue_purge(&npinfo->txq);
+ /* now cancel it again */
+ cancel_delayed_work(&npinfo->tx_work);
+ kfree(npinfo);
+}
+
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -903,20 +939,24 @@ void __netpoll_cleanup(struct netpoll *np)
ops->ndo_netpoll_cleanup(np->dev);
RCU_INIT_POINTER(np->dev->npinfo, NULL);
+ call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+ }
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
- /* avoid racing with NAPI reading npinfo */
- synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+ struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
- skb_queue_purge(&npinfo->arp_tx);
- skb_queue_purge(&npinfo->txq);
- cancel_delayed_work_sync(&npinfo->tx_work);
+ __netpoll_cleanup(np);
+ kfree(np);
+}
- /* clean after last, unfinished work */
- __skb_queue_purge(&npinfo->txq);
- kfree(npinfo);
- }
+void __netpoll_free_rcu(struct netpoll *np)
+{
+ call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
}
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
void netpoll_cleanup(struct netpoll *np)
{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index ed0c043..79285a3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -73,7 +73,6 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
((sizeof(u32) * new_len));
struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
struct netprio_map *old_priomap;
- int i;
old_priomap = rtnl_dereference(dev->priomap);
@@ -82,10 +81,10 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
return -ENOMEM;
}
- for (i = 0;
- old_priomap && (i < old_priomap->priomap_len);
- i++)
- new_priomap->priomap[i] = old_priomap->priomap[i];
+ if (old_priomap)
+ memcpy(new_priomap->priomap, old_priomap->priomap,
+ old_priomap->priomap_len *
+ sizeof(old_priomap->priomap[0]));
new_priomap->priomap_len = new_len;
@@ -101,42 +100,14 @@ static int write_update_netdev_table(struct net_device *dev)
u32 max_len;
struct netprio_map *map;
- rtnl_lock();
max_len = atomic_read(&max_prioidx) + 1;
map = rtnl_dereference(dev->priomap);
if (!map || map->priomap_len < max_len)
ret = extend_netdev_table(dev, max_len);
- rtnl_unlock();
return ret;
}
-static int update_netdev_tables(void)
-{
- int ret = 0;
- struct net_device *dev;
- u32 max_len;
- struct netprio_map *map;
-
- rtnl_lock();
- max_len = atomic_read(&max_prioidx) + 1;
- for_each_netdev(&init_net, dev) {
- map = rtnl_dereference(dev->priomap);
- /*
- * don't allocate priomap if we didn't
- * change net_prio.ifpriomap (map == NULL),
- * this will speed up skb_update_prio.
- */
- if (map && map->priomap_len < max_len) {
- ret = extend_netdev_table(dev, max_len);
- if (ret < 0)
- break;
- }
- }
- rtnl_unlock();
- return ret;
-}
-
static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
{
struct cgroup_netprio_state *cs;
@@ -155,12 +126,6 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
goto out;
}
- ret = update_netdev_tables();
- if (ret < 0) {
- put_prioidx(cs->prioidx);
- goto out;
- }
-
return &cs->css;
out:
kfree(cs);
@@ -256,17 +221,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
if (!dev)
goto out_free_devname;
+ rtnl_lock();
ret = write_update_netdev_table(dev);
if (ret < 0)
goto out_put_dev;
- rcu_read_lock();
- map = rcu_dereference(dev->priomap);
+ map = rtnl_dereference(dev->priomap);
if (map)
map->priomap[prioidx] = priority;
- rcu_read_unlock();
out_put_dev:
+ rtnl_unlock();
dev_put(dev);
out_free_devname:
@@ -274,54 +239,26 @@ out_free_devname:
return ret;
}
+static int update_netprio(const void *v, struct file *file, unsigned n)
+{
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+ if (sock)
+ sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
+ return 0;
+}
+
void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
- char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
-
- if (!tmp) {
- pr_warn("Unable to attach cgrp due to alloc failure!\n");
- return;
- }
+ void *v;
cgroup_taskset_for_each(p, cgrp, tset) {
- unsigned int fd;
- struct fdtable *fdt;
- struct files_struct *files;
-
task_lock(p);
- files = p->files;
- if (!files) {
- task_unlock(p);
- continue;
- }
-
- rcu_read_lock();
- fdt = files_fdtable(files);
- for (fd = 0; fd < fdt->max_fds; fd++) {
- char *path;
- struct file *file;
- struct socket *sock;
- unsigned long s;
- int rv, err = 0;
-
- file = fcheck_files(files, fd);
- if (!file)
- continue;
-
- path = d_path(&file->f_path, tmp, PAGE_SIZE);
- rv = sscanf(path, "socket:[%lu]", &s);
- if (rv <= 0)
- continue;
-
- sock = sock_from_file(file, &err);
- if (!err)
- sock_update_netprioidx(sock->sk, p);
- }
- rcu_read_unlock();
+ v = (void *)(unsigned long)task_netprioidx(p);
+ iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
}
- kfree(tmp);
}
static struct cftype ss_files[] = {
@@ -342,11 +279,19 @@ struct cgroup_subsys net_prio_subsys = {
.create = cgrp_create,
.destroy = cgrp_destroy,
.attach = net_prio_attach,
-#ifdef CONFIG_NETPRIO_CGROUP
.subsys_id = net_prio_subsys_id,
-#endif
.base_cftypes = ss_files,
- .module = THIS_MODULE
+ .module = THIS_MODULE,
+
+ /*
+ * net_prio has artificial limit on the number of cgroups and
+ * disallows nesting making it impossible to co-mount it with other
+ * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
+ * limit and properly nest configuration such that children follow
+ * their parents' configurations by default and are allowed to
+ * override and remove the following.
+ */
+ .broken_hierarchy = true,
};
static int netprio_device_event(struct notifier_block *unused,
@@ -382,10 +327,6 @@ static int __init init_cgroup_netprio(void)
ret = cgroup_load_subsys(&net_prio_subsys);
if (ret)
goto out;
-#ifndef CONFIG_NETPRIO_CGROUP
- smp_wmb();
- net_prio_subsys_id = net_prio_subsys.subsys_id;
-#endif
register_netdevice_notifier(&netprio_device_notifier);
@@ -402,11 +343,6 @@ static void __exit exit_cgroup_netprio(void)
cgroup_unload_subsys(&net_prio_subsys);
-#ifndef CONFIG_NETPRIO_CGROUP
- net_prio_subsys_id = -1;
- synchronize_rcu();
-#endif
-
rtnl_lock();
for_each_netdev(&init_net, dev) {
old = rtnl_dereference(dev->priomap);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index cce9e53..148e73d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2721,7 +2721,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
pkt_dev->pkt_overhead;
- if (datalen < sizeof(struct pktgen_hdr))
+ if (datalen < 0 || datalen < sizeof(struct pktgen_hdr))
datalen = sizeof(struct pktgen_hdr);
udph->source = htons(pkt_dev->cur_udp_src);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6..c31d9e8 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/tcp.h>
#include <linux/vmalloc.h>
#include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
kfree(lopt);
}
+/*
+ * This function is called to set a Fast Open socket's "fastopen_rsk" field
+ * to NULL when a TFO socket no longer needs to access the request_sock.
+ * This happens only after 3WHS has been either completed or aborted (e.g.,
+ * RST is received).
+ *
+ * Before TFO, a child socket is created only after 3WHS is completed,
+ * hence it never needs to access the request_sock. things get a lot more
+ * complex with TFO. A child socket, accepted or not, has to access its
+ * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
+ * until 3WHS is either completed or aborted. Afterwards the req will stay
+ * until either the child socket is accepted, or in the rare case when the
+ * listener is closed before the child is accepted.
+ *
+ * In short, a request socket is only freed after BOTH 3WHS has completed
+ * (or aborted) and the child socket has been accepted (or listener closed).
+ * When a child socket is accepted, its corresponding req->sk is set to
+ * NULL since it's no longer needed. More importantly, "req->sk == NULL"
+ * will be used by the code below to determine if a child socket has been
+ * accepted or not, and the check is protected by the fastopenq->lock
+ * described below.
+ *
+ * Note that fastopen_rsk is only accessed from the child socket's context
+ * with its socket lock held. But a request_sock (req) can be accessed by
+ * both its child socket through fastopen_rsk, and a listener socket through
+ * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
+ * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
+ * only in the rare case when both the listener and the child locks are held,
+ * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
+ * The lock also protects other fields such as fastopenq->qlen, which is
+ * decremented by this function when fastopen_rsk is no longer needed.
+ *
+ * Note that another solution was to simply use the existing socket lock
+ * from the listener. But first socket lock is difficult to use. It is not
+ * a simple spin lock - one must consider sock_owned_by_user() and arrange
+ * to use sk_add_backlog() stuff. But what really makes it infeasible is the
+ * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
+ * acquire a child's lock while holding listener's socket lock. A corner
+ * case might also exist in tcp_v4_hnd_req() that will trigger this locking
+ * order.
+ *
+ * When a TFO req is created, it needs to sock_hold its listener to prevent
+ * the latter data structure from going away.
+ *
+ * This function also sets "treq->listener" to NULL and unreference listener
+ * socket. treq->listener is used by the listener so it is protected by the
+ * fastopenq->lock in this function.
+ */
+void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
+ bool reset)
+{
+ struct sock *lsk = tcp_rsk(req)->listener;
+ struct fastopen_queue *fastopenq =
+ inet_csk(lsk)->icsk_accept_queue.fastopenq;
+
+ BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
+
+ tcp_sk(sk)->fastopen_rsk = NULL;
+ spin_lock_bh(&fastopenq->lock);
+ fastopenq->qlen--;
+ tcp_rsk(req)->listener = NULL;
+ if (req->sk) /* the child socket hasn't been accepted yet */
+ goto out;
+
+ if (!reset || lsk->sk_state != TCP_LISTEN) {
+ /* If the listener has been closed don't bother with the
+ * special RST handling below.
+ */
+ spin_unlock_bh(&fastopenq->lock);
+ sock_put(lsk);
+ reqsk_free(req);
+ return;
+ }
+ /* Wait for 60secs before removing a req that has triggered RST.
+ * This is a simple defense against TFO spoofing attack - by
+ * counting the req against fastopen.max_qlen, and disabling
+ * TFO when the qlen exceeds max_qlen.
+ *
+ * For more details see CoNext'11 "TCP Fast Open" paper.
+ */
+ req->expires = jiffies + 60*HZ;
+ if (fastopenq->rskq_rst_head == NULL)
+ fastopenq->rskq_rst_head = req;
+ else
+ fastopenq->rskq_rst_tail->dl_next = req;
+
+ req->dl_next = NULL;
+ fastopenq->rskq_rst_tail = req;
+ fastopenq->qlen++;
+out:
+ spin_unlock_bh(&fastopenq->lock);
+ sock_put(lsk);
+ return;
+}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c5a0a0..76d4c2c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -618,7 +618,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
- .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+ .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
@@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < s_idx)
goto cont;
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI,
ext_filter_mask) <= 0)
@@ -1812,8 +1812,6 @@ replay:
return -ENODEV;
}
- if (ifm->ifi_index)
- return -EOPNOTSUPP;
if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
@@ -1839,10 +1837,14 @@ replay:
return PTR_ERR(dest_net);
dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
-
- if (IS_ERR(dev))
+ if (IS_ERR(dev)) {
err = PTR_ERR(dev);
- else if (ops->newlink)
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
err = ops->newlink(net, dev, tb, data);
else
err = register_netdevice(dev);
@@ -1897,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (nskb == NULL)
return -ENOBUFS;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
+ err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
- err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
return err;
}
@@ -2088,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
(dev->priv_flags & IFF_BRIDGE_PORT)) {
master = dev->master;
- err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+ err = master->netdev_ops->ndo_fdb_add(ndm, tb,
+ dev, addr,
nlh->nlmsg_flags);
if (err)
goto out;
@@ -2098,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
/* Embedded bridge, macvlan, and any other device support */
if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
- err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+ err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
+ dev, addr,
nlh->nlmsg_flags);
if (!err) {
@@ -2178,9 +2182,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
{
struct netdev_hw_addr *ha;
int err;
- u32 pid, seq;
+ u32 portid, seq;
- pid = NETLINK_CB(cb->skb).pid;
+ portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
@@ -2188,7 +2192,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
- pid, seq, 0, NTF_SELF);
+ portid, seq, 0, NTF_SELF);
if (err < 0)
return err;
skip:
@@ -2356,7 +2360,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_BATCH:
+ case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE:
case NETDEV_JOIN:
break;
@@ -2379,9 +2383,10 @@ static int __net_init rtnetlink_net_init(struct net *net)
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
- sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
+ sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
@@ -2414,7 +2419,6 @@ void __init rtnetlink_init(void)
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
- netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
diff --git a/net/core/scm.c b/net/core/scm.c
index 8f6ccfd..ab57084 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -45,12 +45,17 @@
static __inline__ int scm_check_creds(struct ucred *creds)
{
const struct cred *cred = current_cred();
+ kuid_t uid = make_kuid(cred->user_ns, creds->uid);
+ kgid_t gid = make_kgid(cred->user_ns, creds->gid);
+
+ if (!uid_valid(uid) || !gid_valid(gid))
+ return -EINVAL;
if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
- ((creds->uid == cred->uid || creds->uid == cred->euid ||
- creds->uid == cred->suid) || capable(CAP_SETUID)) &&
- ((creds->gid == cred->gid || creds->gid == cred->egid ||
- creds->gid == cred->sgid) || capable(CAP_SETGID))) {
+ ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
+ uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) &&
+ ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
+ gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
@@ -149,39 +154,54 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
goto error;
break;
case SCM_CREDENTIALS:
+ {
+ struct ucred creds;
+ kuid_t uid;
+ kgid_t gid;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
- memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
- err = scm_check_creds(&p->creds);
+ memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+ err = scm_check_creds(&creds);
if (err)
goto error;
- if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
+ p->creds.pid = creds.pid;
+ if (!p->pid || pid_vnr(p->pid) != creds.pid) {
struct pid *pid;
err = -ESRCH;
- pid = find_get_pid(p->creds.pid);
+ pid = find_get_pid(creds.pid);
if (!pid)
goto error;
put_pid(p->pid);
p->pid = pid;
}
+ err = -EINVAL;
+ uid = make_kuid(current_user_ns(), creds.uid);
+ gid = make_kgid(current_user_ns(), creds.gid);
+ if (!uid_valid(uid) || !gid_valid(gid))
+ goto error;
+
+ p->creds.uid = uid;
+ p->creds.gid = gid;
+
if (!p->cred ||
- (p->cred->euid != p->creds.uid) ||
- (p->cred->egid != p->creds.gid)) {
+ !uid_eq(p->cred->euid, uid) ||
+ !gid_eq(p->cred->egid, gid)) {
struct cred *cred;
err = -ENOMEM;
cred = prepare_creds();
if (!cred)
goto error;
- cred->uid = cred->euid = p->creds.uid;
- cred->gid = cred->egid = p->creds.gid;
+ cred->uid = cred->euid = uid;
+ cred->gid = cred->egid = gid;
if (p->cred)
put_cred(p->cred);
p->cred = cred;
}
break;
+ }
default:
goto error;
}
@@ -265,6 +285,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
i++, cmfptr++)
{
+ struct socket *sock;
int new_fd;
err = security_file_receive(fp[i]);
if (err)
@@ -280,8 +301,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
break;
}
/* Bump the usage count and install the file. */
- get_file(fp[i]);
- fd_install(new_fd, fp[i]);
+ sock = sock_from_file(fp[i], &err);
+ if (sock)
+ sock_update_netprioidx(sock->sk, current);
+ fd_install(new_fd, get_file(fp[i]));
}
if (i > 0)
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 99b2596..e61a8bb 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
return hash[0];
}
+EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fe00d12..cdc2859 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
EXPORT_SYMBOL(build_skb);
struct netdev_alloc_cache {
- struct page *page;
- unsigned int offset;
- unsigned int pagecnt_bias;
+ struct page_frag frag;
+ /* we maintain a pagecount bias, so that we dont dirty cache line
+ * containing page->_count every time we allocate a fragment.
+ */
+ unsigned int pagecnt_bias;
};
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
struct netdev_alloc_cache *nc;
void *data = NULL;
+ int order;
unsigned long flags;
local_irq_save(flags);
nc = &__get_cpu_var(netdev_alloc_cache);
- if (unlikely(!nc->page)) {
+ if (unlikely(!nc->frag.page)) {
refill:
- nc->page = alloc_page(gfp_mask);
- if (unlikely(!nc->page))
- goto end;
+ for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
+ gfp_t gfp = gfp_mask;
+
+ if (order)
+ gfp |= __GFP_COMP | __GFP_NOWARN;
+ nc->frag.page = alloc_pages(gfp, order);
+ if (likely(nc->frag.page))
+ break;
+ if (--order < 0)
+ goto end;
+ }
+ nc->frag.size = PAGE_SIZE << order;
recycle:
- atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
- nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
- nc->offset = 0;
+ atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+ nc->frag.offset = 0;
}
- if (nc->offset + fragsz > PAGE_SIZE) {
+ if (nc->frag.offset + fragsz > nc->frag.size) {
/* avoid unnecessary locked operations if possible */
- if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
- atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
+ if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
+ atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
goto recycle;
goto refill;
}
- data = page_address(nc->page) + nc->offset;
- nc->offset += fragsz;
+ data = page_address(nc->frag.page) + nc->frag.offset;
+ nc->frag.offset += fragsz;
nc->pagecnt_bias--;
end:
local_irq_restore(flags);
@@ -1655,38 +1669,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len,
unsigned int *offset,
struct sk_buff *skb, struct sock *sk)
{
- struct page *p = sk->sk_sndmsg_page;
- unsigned int off;
-
- if (!p) {
-new_page:
- p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
- if (!p)
- return NULL;
+ struct page_frag *pfrag = sk_page_frag(sk);
- off = sk->sk_sndmsg_off = 0;
- /* hold one ref to this page until it's full */
- } else {
- unsigned int mlen;
-
- /* If we are the only user of the page, we can reset offset */
- if (page_count(p) == 1)
- sk->sk_sndmsg_off = 0;
- off = sk->sk_sndmsg_off;
- mlen = PAGE_SIZE - off;
- if (mlen < 64 && mlen < *len) {
- put_page(p);
- goto new_page;
- }
+ if (!sk_page_frag_refill(sk, pfrag))
+ return NULL;
- *len = min_t(unsigned int, *len, mlen);
- }
+ *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
- memcpy(page_address(p) + off, page_address(page) + *offset, *len);
- sk->sk_sndmsg_off += *len;
- *offset = off;
+ memcpy(page_address(pfrag->page) + pfrag->offset,
+ page_address(page) + *offset, *len);
+ *offset = pfrag->offset;
+ pfrag->offset += *len;
- return p;
+ return pfrag->page;
}
static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
@@ -3488,8 +3483,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
return false;
- delta = from->truesize -
- SKB_TRUESIZE(skb_end_pointer(from) - from->head);
+ delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
}
WARN_ON_ONCE(delta < len);
@@ -3502,7 +3496,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (!skb_cloned(from))
skb_shinfo(from)->nr_frags = 0;
- /* if the skb is cloned this does nothing since we set nr_frags to 0 */
+ /* if the skb is not cloned this does nothing
+ * since we set nr_frags to 0.
+ */
for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
skb_frag_ref(from, i);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced..8a146cf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -326,17 +326,6 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sk_backlog_rcv);
-#if defined(CONFIG_CGROUPS)
-#if !defined(CONFIG_NET_CLS_CGROUP)
-int net_cls_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_cls_subsys_id);
-#endif
-#if !defined(CONFIG_NETPRIO_CGROUP)
-int net_prio_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_prio_subsys_id);
-#endif
-#endif
-
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
struct timeval tv;
@@ -691,7 +680,8 @@ set_rcvbuf:
case SO_KEEPALIVE:
#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
tcp_set_keepalive(sk, valbool);
#endif
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
@@ -868,8 +858,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
if (cred) {
struct user_namespace *current_ns = current_user_ns();
- ucred->uid = from_kuid(current_ns, cred->euid);
- ucred->gid = from_kgid(current_ns, cred->egid);
+ ucred->uid = from_kuid_munged(current_ns, cred->euid);
+ ucred->gid = from_kgid_munged(current_ns, cred->egid);
}
}
EXPORT_SYMBOL_GPL(cred_to_ucred);
@@ -1223,6 +1213,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
}
#ifdef CONFIG_CGROUPS
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
void sock_update_classid(struct sock *sk)
{
u32 classid;
@@ -1230,11 +1221,13 @@ void sock_update_classid(struct sock *sk)
rcu_read_lock(); /* doing current task, which cannot vanish. */
classid = task_cls_classid(current);
rcu_read_unlock();
- if (classid && classid != sk->sk_classid)
+ if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
EXPORT_SYMBOL(sock_update_classid);
+#endif
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
{
if (in_interrupt())
@@ -1244,6 +1237,7 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
}
EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
+#endif
/**
* sk_alloc - All socket objects are allocated here
@@ -1464,19 +1458,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
-void __init sk_init(void)
-{
- if (totalram_pages <= 4096) {
- sysctl_wmem_max = 32767;
- sysctl_rmem_max = 32767;
- sysctl_wmem_default = 32767;
- sysctl_rmem_default = 32767;
- } else if (totalram_pages >= 131072) {
- sysctl_wmem_max = 131071;
- sysctl_rmem_max = 131071;
- }
-}
-
/*
* Simple resource managers for sockets.
*/
@@ -1523,16 +1504,23 @@ EXPORT_SYMBOL(sock_rfree);
void sock_edemux(struct sk_buff *skb)
{
- sock_put(skb->sk);
+ struct sock *sk = skb->sk;
+
+#ifdef CONFIG_INET
+ if (sk->sk_state == TCP_TIME_WAIT)
+ inet_twsk_put(inet_twsk(sk));
+ else
+#endif
+ sock_put(sk);
}
EXPORT_SYMBOL(sock_edemux);
-int sock_i_uid(struct sock *sk)
+kuid_t sock_i_uid(struct sock *sk)
{
- int uid;
+ kuid_t uid;
read_lock_bh(&sk->sk_callback_lock);
- uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
+ uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
read_unlock_bh(&sk->sk_callback_lock);
return uid;
}
@@ -1737,6 +1725,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
}
EXPORT_SYMBOL(sock_alloc_send_skb);
+/* On 32bit arches, an skb frag is limited to 2^15 */
+#define SKB_FRAG_PAGE_ORDER get_order(32768)
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+ int order;
+
+ if (pfrag->page) {
+ if (atomic_read(&pfrag->page->_count) == 1) {
+ pfrag->offset = 0;
+ return true;
+ }
+ if (pfrag->offset < pfrag->size)
+ return true;
+ put_page(pfrag->page);
+ }
+
+ /* We restrict high order allocations to users that can afford to wait */
+ order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+
+ do {
+ gfp_t gfp = sk->sk_allocation;
+
+ if (order)
+ gfp |= __GFP_COMP | __GFP_NOWARN;
+ pfrag->page = alloc_pages(gfp, order);
+ if (likely(pfrag->page)) {
+ pfrag->offset = 0;
+ pfrag->size = PAGE_SIZE << order;
+ return true;
+ }
+ } while (--order >= 0);
+
+ sk_enter_memory_pressure(sk);
+ sk_stream_moderate_sndbuf(sk);
+ return false;
+}
+EXPORT_SYMBOL(sk_page_frag_refill);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
@@ -2166,8 +2193,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
- sk->sk_sndmsg_page = NULL;
- sk->sk_sndmsg_off = 0;
+ sk->sk_frag.page = NULL;
+ sk->sk_frag.offset = 0;
sk->sk_peek_off = -1;
sk->sk_peer_pid = NULL;
@@ -2410,6 +2437,12 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
+
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ }
+
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9d8755e..602cd63 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net)
.input = sock_diag_rcv,
};
- net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG,
- THIS_MODULE, &cfg);
+ net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
return net->diag_nlsk == NULL ? -ENOMEM : 0;
}
diff --git a/net/core/utils.c b/net/core/utils.c
index 39895a6..f5613d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
+ const __be32 *from, const __be32 *to,
+ int pseudohdr)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], ~from[2], ~from[3],
+ to[0], to[1], to[2], to[3],
+ };
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_partial(diff, sizeof(diff),
+ ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_partial(diff, sizeof(diff),
+ ~skb->csum);
+ } else if (pseudohdr)
+ *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+ csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace16);
+
int mac_pton(const char *s, u8 *mac)
{
int i;