summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2015-02-13 22:12:06 (GMT)
committerScott Wood <scottwood@freescale.com>2015-02-13 22:19:22 (GMT)
commit6faa2909871d8937cb2f79a10e1b21ffe193fac1 (patch)
treef558a94f1553814cc122ab8d9e04c0ebad5262a5 /net
parentfcb2fb84301c673ee15ca04e7a2fc965712d49a0 (diff)
downloadlinux-fsl-qoriq-6faa2909871d8937cb2f79a10e1b21ffe193fac1.tar.xz
Reset to 3.12.37
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c50
-rw-r--r--net/8021q/vlan_dev.c50
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/batman-adv/gateway_client.c11
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/bluetooth/hci_conn.c11
-rw-r--r--net/bluetooth/hci_event.c17
-rw-r--r--net/bluetooth/l2cap_sock.c13
-rw-r--r--net/bluetooth/mgmt.c7
-rw-r--r--net/bluetooth/rfcomm/core.c7
-rw-r--r--net/bluetooth/rfcomm/sock.c3
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/bridge/br_netlink.c15
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_vlan.c50
-rw-r--r--net/bridge/netfilter/ebtables.c5
-rw-r--r--net/can/gw.c4
-rw-r--r--net/ceph/auth_x.c256
-rw-r--r--net/ceph/crypto.c169
-rw-r--r--net/ceph/messenger.c28
-rw-r--r--net/ceph/mon_client.c8
-rw-r--r--net/compat.c9
-rw-r--r--net/core/dev.c164
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/iovec.c10
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c113
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/core/skbuff.c182
-rw-r--r--net/core/sock.c52
-rw-r--r--net/core/sock_diag.c4
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c4
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/dns_resolver/dns_query.c4
-rw-r--r--net/ipv4/datagram.c20
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/icmp.c32
-rw-r--r--net/ipv4/igmp.c14
-rw-r--r--net/ipv4/inetpeer.c18
-rw-r--r--net/ipv4/ip_forward.c4
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c27
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c5
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c7
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c94
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c62
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_input.c29
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_output.c16
-rw-r--r--net/ipv4/tcp_vegas.c3
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c25
-rw-r--r--net/ipv6/ip6_output.c27
-rw-r--r--net/ipv6/ip6_tunnel.c13
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/output_core.c60
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/sit.c26
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipx/af_ipx.c6
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/l2tp/l2tp_ppp.c9
-rw-r--r--net/mac80211/debugfs_netdev.c6
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c8
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mlme.c44
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c14
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/mac80211/rx.c19
-rw-r--r--net/mac80211/sta_info.c1
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/netfilter/nf_nat_core.c35
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c31
-rw-r--r--net/netlink/af_netlink.c140
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/openvswitch/actions.c5
-rw-r--r--net/packet/af_packet.c22
-rw-r--r--net/packet/diag.c7
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/pn_netlink.c8
-rw-r--r--net/rds/ib_rdma.c3
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/auth.c19
-rw-r--r--net/sctp/endpointola.c3
-rw-r--r--net/sctp/inqueue.c33
-rw-r--r--net/sctp/output.c6
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c134
-rw-r--r--net/sctp/sm_statefuns.c46
-rw-r--r--net/sctp/socket.c96
-rw-r--r--net/sctp/sysctl.c56
-rw-r--r--net/sctp/ulpevent.c122
-rw-r--r--net/socket.c7
-rw-r--r--net/sunrpc/svc_xprt.c2
-rw-r--r--net/sunrpc/svcsock.c18
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/tipc/bcast.c1
-rw-r--r--net/tipc/msg.c22
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/vmw_vsock/af_vsock.c47
-rw-r--r--net/wireless/nl80211.c3
-rw-r--r--net/wireless/reg.c6
-rw-r--r--net/wireless/sme.c2
-rw-r--r--net/wireless/trace.h3
-rw-r--r--net/xfrm/xfrm_policy.c48
-rw-r--r--net/xfrm/xfrm_user.c38
146 files changed, 1862 insertions, 1276 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 6ee48aa..5d56e05 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -106,56 +106,6 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
}
EXPORT_SYMBOL(vlan_dev_vlan_id);
-static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
-{
- if (skb_cow(skb, skb_headroom(skb)) < 0)
- return NULL;
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
- skb->mac_header += VLAN_HLEN;
- return skb;
-}
-
-struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
- struct vlan_hdr *vhdr;
- u16 vlan_tci;
-
- if (unlikely(vlan_tx_tag_present(skb))) {
- /* vlan_tci is already set-up so leave this for another time */
- return skb;
- }
-
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(!skb))
- goto err_free;
-
- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
- goto err_free;
-
- vhdr = (struct vlan_hdr *) skb->data;
- vlan_tci = ntohs(vhdr->h_vlan_TCI);
- __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
-
- skb_pull_rcsum(skb, VLAN_HLEN);
- vlan_set_encap_proto(skb, vhdr);
-
- skb = vlan_reorder_header(skb);
- if (unlikely(!skb))
- goto err_free;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb_reset_mac_len(skb);
-
- return skb;
-
-err_free:
- kfree_skb(skb);
- return NULL;
-}
-EXPORT_SYMBOL(vlan_untag);
-
-
/*
* vlan info and vid list
*/
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index d1537dc..0c21361 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -512,10 +512,48 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
}
}
+static int vlan_calculate_locking_subclass(struct net_device *real_dev)
+{
+ int subclass = 0;
+
+ while (is_vlan_dev(real_dev)) {
+ subclass++;
+ real_dev = vlan_dev_priv(real_dev)->real_dev;
+ }
+
+ return subclass;
+}
+
+static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0, subclass;
+
+ subclass = vlan_calculate_locking_subclass(to);
+
+ spin_lock_nested(&to->addr_list_lock, subclass);
+ err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ spin_unlock(&to->addr_list_lock);
+}
+
+static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0, subclass;
+
+ subclass = vlan_calculate_locking_subclass(to);
+
+ spin_lock_nested(&to->addr_list_lock, subclass);
+ err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ spin_unlock(&to->addr_list_lock);
+}
+
static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
{
- dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
- dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+ vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+ vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
/*
@@ -624,9 +662,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- if (is_vlan_dev(real_dev))
- subclass = 1;
-
+ subclass = vlan_calculate_locking_subclass(dev);
vlan_dev_set_lockdep_class(dev, subclass);
vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
@@ -658,9 +694,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
netdev_features_t old_features = features;
- features &= real_dev->vlan_features;
+ features = netdev_intersect_features(features, real_dev->vlan_features);
features |= NETIF_F_RXCSUM;
- features &= real_dev->features;
+ features = netdev_intersect_features(features, real_dev->features);
features |= old_features & NETIF_F_SOFT_FEATURES;
features |= NETIF_F_LLTX;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7d424ac..43e875c 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
/* Queue packet (standard) */
- skb->sk = sock;
-
if (sock_queue_rcv_skb(sock, skb) < 0)
goto drop;
@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
if (!skb)
goto out;
- skb->sk = sk;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
skb->dev = dev;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 1ce4b87..0679960 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -38,8 +38,10 @@
static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
{
- if (atomic_dec_and_test(&gw_node->refcount))
+ if (atomic_dec_and_test(&gw_node->refcount)) {
+ batadv_orig_node_free_ref(gw_node->orig_node);
kfree_rcu(gw_node, rcu);
+ }
}
static struct batadv_gw_node *
@@ -344,9 +346,14 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node;
int down, up;
+ if (!atomic_inc_not_zero(&orig_node->refcount))
+ return;
+
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
- if (!gw_node)
+ if (!gw_node) {
+ batadv_orig_node_free_ref(orig_node);
return;
+ }
INIT_HLIST_NODE(&gw_node->list);
gw_node->orig_node = orig_node;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c478e6b..75f8c72 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return true;
/* no more parents..stop recursion */
- if (net_dev->iflink == net_dev->ifindex)
+ if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex)
return false;
/* recurse over the parent device */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f081712..3d33941 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -690,14 +690,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
struct hci_cp_auth_requested cp;
- /* encrypt must be pending if auth is also pending */
- set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
-
cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
sizeof(cp), &cp);
- if (conn->key_type != 0xff)
+
+ /* If we're already encrypted set the REAUTH_PEND flag,
+ * otherwise set the ENCRYPT_PEND.
+ */
+ if (conn->link_mode & HCI_LM_ENCRYPT)
set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
+ else
+ set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
}
return 0;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3af2b7..729f516 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -47,6 +47,10 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
+ hci_dev_lock(hdev);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ hci_dev_unlock(hdev);
+
hci_conn_check_pending(hdev);
}
@@ -2993,6 +2997,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
if (!conn)
goto unlock;
+ /* For BR/EDR the necessary steps are taken through the
+ * auth_complete event.
+ */
+ if (conn->type != LE_LINK)
+ goto unlock;
+
if (!ev->status)
conn->sec_level = conn->pending_sec_level;
@@ -3158,8 +3168,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
- * confirm_hint set to 1). */
- if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
+ * confirm_hint set to 1). The exception is if neither
+ * side had MITM in which case we do auto-accept.
+ */
+ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
+ (loc_mitm || rem_mitm)) {
BT_DBG("Confirming auto-accept as acceptor");
confirm_hint = 1;
goto confirm;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 0098af8..2710e85 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -631,11 +631,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
/*change security for LE channels */
if (chan->scid == L2CAP_CID_ATT) {
- if (!conn->hcon->out) {
- err = -EINVAL;
- break;
- }
-
if (smp_conn_security(conn->hcon, sec.level))
break;
sk->sk_state = BT_CONFIG;
@@ -887,7 +882,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
l2cap_chan_close(chan, 0);
lock_sock(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED,
sk->sk_lingertime);
}
@@ -949,13 +945,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
BT_DBG("backlog full %d", parent->sk_ack_backlog);
+ release_sock(parent);
return NULL;
}
sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
GFP_ATOMIC);
- if (!sk)
+ if (!sk) {
+ release_sock(parent);
return NULL;
+ }
bt_sock_reclassify_lock(sk, BTPROTO_L2CAP);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index fedc539..211fffb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2319,8 +2319,13 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
}
if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
- /* Continue with pairing via SMP */
+ /* Continue with pairing via SMP. The hdev lock must be
+ * released as SMP may try to recquire it for crypto
+ * purposes.
+ */
+ hci_dev_unlock(hdev);
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
+ hci_dev_lock(hdev);
if (!err)
err = cmd_complete(sk, hdev->id, mgmt_op,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ca957d3..19ba192 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1857,10 +1857,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb))
+ if (!skb_linearize(skb)) {
s = rfcomm_recv_frame(s, skb);
- else
+ if (!s)
+ break;
+ } else {
kfree_skb(skb);
+ }
}
if (s && (sk->sk_state == BT_CLOSED))
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c1c6028..7ca014d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -887,7 +887,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
sk->sk_shutdown = SHUTDOWN_MASK;
__rfcomm_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
}
release_sock(sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index d021e44..4f5f01b 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -913,7 +913,8 @@ static int sco_sock_shutdown(struct socket *sock, int how)
sco_sock_clear_timer(sk);
__sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED,
sk->sk_lingertime);
}
@@ -933,7 +934,8 @@ static int sco_sock_release(struct socket *sock)
sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) {
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
release_sock(sk);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a2fd37e..1f59299 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -72,7 +72,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
goto drop;
if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
- goto drop;
+ goto out;
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
@@ -146,8 +146,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
- br_vlan_get_tag(skb, &vid);
- if (p->flags & BR_LEARNING)
+ /* check if vlan is allowed, to avoid spoofing */
+ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
return 0; /* process further */
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f75d92e..b47b344e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -446,6 +446,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
+static int br_dev_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ if (tb[IFLA_ADDRESS]) {
+ spin_lock_bh(&br->lock);
+ br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+ spin_unlock_bh(&br->lock);
+ }
+
+ return register_netdevice(dev);
+}
+
static size_t br_get_link_af_size(const struct net_device *dev)
{
struct net_port_vlans *pv;
@@ -474,6 +488,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.priv_size = sizeof(struct net_bridge),
.setup = br_dev_setup,
.validate = br_validate,
+ .newlink = br_dev_newlink,
.dellink = br_dev_delete,
};
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9a63c42..f02acd7 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -309,6 +309,9 @@ struct br_input_skb_cb {
int igmp;
int mrouters_only;
#endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ bool vlan_filtered;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -605,6 +608,7 @@ extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
extern bool br_allowed_egress(struct net_bridge *br,
const struct net_port_vlans *v,
const struct sk_buff *skb);
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
extern struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb);
@@ -671,6 +675,12 @@ static inline bool br_allowed_egress(struct net_bridge *br,
return true;
}
+static inline bool br_should_learn(struct net_bridge_port *p,
+ struct sk_buff *skb, u16 *vid)
+{
+ return true;
+}
+
static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 53f0990..f0db99f 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -136,7 +136,7 @@ static struct sk_buff *br_vlan_untag(struct sk_buff *skb)
}
skb->vlan_tci = 0;
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (skb)
skb->vlan_tci = 0;
@@ -149,7 +149,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
{
u16 vid;
- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
goto out;
/* At this point, we know that the frame was filtered and contains
@@ -194,14 +195,18 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
- if (!br->vlan_enabled)
+ if (!br->vlan_enabled) {
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
+ }
/* If there are no vlan in the permitted list, all packets are
* rejected.
*/
if (!v)
- return false;
+ goto drop;
+
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
err = br_vlan_get_tag(skb, vid);
if (!*vid) {
@@ -212,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* vlan untagged or priority-tagged traffic belongs to.
*/
if (pvid == VLAN_N_VID)
- return false;
+ goto drop;
/* PVID is set on this port. Any untagged or priority-tagged
* ingress frame is considered to belong to this vlan.
@@ -235,7 +240,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* Frame had a valid vlan tag. See if vlan is allowed */
if (test_bit(*vid, v->vlan_bitmap))
return true;
-
+drop:
+ kfree_skb(skb);
return false;
}
@@ -246,7 +252,8 @@ bool br_allowed_egress(struct net_bridge *br,
{
u16 vid;
- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
return true;
if (!v)
@@ -259,6 +266,35 @@ bool br_allowed_egress(struct net_bridge *br,
return false;
}
+/* Called under RCU */
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
+{
+ struct net_bridge *br = p->br;
+ struct net_port_vlans *v;
+
+ /* If filtering was disabled at input, let it pass. */
+ if (!br->vlan_enabled)
+ return true;
+
+ v = rcu_dereference(p->vlan_info);
+ if (!v)
+ return false;
+
+ br_vlan_get_tag(skb, vid);
+ if (!*vid) {
+ *vid = br_get_pvid(v);
+ if (*vid == VLAN_N_VID)
+ return false;
+
+ return true;
+ }
+
+ if (test_bit(*vid, v->vlan_bitmap))
+ return true;
+
+ return false;
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index ac78024..b166fc2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
repl->num_counters * sizeof(struct ebt_counter))) {
- ret = -EFAULT;
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n");
}
- else
- ret = 0;
/* decrease module count and free resources */
EBT_ENTRY_ITERATE(table->entries, table->entries_size,
diff --git a/net/can/gw.c b/net/can/gw.c
index 3f9b0f3..233ce53 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
u8 limhops = 0;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (nlmsg_len(nlh) < sizeof(*r))
@@ -900,7 +900,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
u8 limhops = 0;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (nlmsg_len(nlh) < sizeof(*r))
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 96238ba..de6662b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -13,8 +13,6 @@
#include "auth_x.h"
#include "auth_x_protocol.h"
-#define TEMP_TICKET_BUF_LEN 256
-
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
@@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret,
}
static int ceph_x_decrypt(struct ceph_crypto_key *secret,
- void **p, void *end, void *obuf, size_t olen)
+ void **p, void *end, void **obuf, size_t olen)
{
struct ceph_x_encrypt_header head;
size_t head_len = sizeof(head);
@@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
return -EINVAL;
dout("ceph_x_decrypt len %d\n", len);
- ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
- *p, len);
+ if (*obuf == NULL) {
+ *obuf = kmalloc(len, GFP_NOFS);
+ if (!*obuf)
+ return -ENOMEM;
+ olen = len;
+ }
+
+ ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
if (ret)
return ret;
if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
@@ -129,139 +133,120 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
kfree(th);
}
-static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
- struct ceph_crypto_key *secret,
- void *buf, void *end)
+static int process_one_ticket(struct ceph_auth_client *ac,
+ struct ceph_crypto_key *secret,
+ void **p, void *end)
{
struct ceph_x_info *xi = ac->private;
- int num;
- void *p = buf;
+ int type;
+ u8 tkt_struct_v, blob_struct_v;
+ struct ceph_x_ticket_handler *th;
+ void *dbuf = NULL;
+ void *dp, *dend;
+ int dlen;
+ char is_enc;
+ struct timespec validity;
+ struct ceph_crypto_key old_key;
+ void *ticket_buf = NULL;
+ void *tp, *tpend;
+ struct ceph_timespec new_validity;
+ struct ceph_crypto_key new_session_key;
+ struct ceph_buffer *new_ticket_blob;
+ unsigned long new_expires, new_renew_after;
+ u64 new_secret_id;
int ret;
- char *dbuf;
- char *ticket_buf;
- u8 reply_struct_v;
- dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
- if (!dbuf)
- return -ENOMEM;
+ ceph_decode_need(p, end, sizeof(u32) + 1, bad);
- ret = -ENOMEM;
- ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
- if (!ticket_buf)
- goto out_dbuf;
+ type = ceph_decode_32(p);
+ dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
- ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
- reply_struct_v = ceph_decode_8(&p);
- if (reply_struct_v != 1)
+ tkt_struct_v = ceph_decode_8(p);
+ if (tkt_struct_v != 1)
goto bad;
- num = ceph_decode_32(&p);
- dout("%d tickets\n", num);
- while (num--) {
- int type;
- u8 tkt_struct_v, blob_struct_v;
- struct ceph_x_ticket_handler *th;
- void *dp, *dend;
- int dlen;
- char is_enc;
- struct timespec validity;
- struct ceph_crypto_key old_key;
- void *tp, *tpend;
- struct ceph_timespec new_validity;
- struct ceph_crypto_key new_session_key;
- struct ceph_buffer *new_ticket_blob;
- unsigned long new_expires, new_renew_after;
- u64 new_secret_id;
-
- ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
-
- type = ceph_decode_32(&p);
- dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
-
- tkt_struct_v = ceph_decode_8(&p);
- if (tkt_struct_v != 1)
- goto bad;
-
- th = get_ticket_handler(ac, type);
- if (IS_ERR(th)) {
- ret = PTR_ERR(th);
- goto out;
- }
- /* blob for me */
- dlen = ceph_x_decrypt(secret, &p, end, dbuf,
- TEMP_TICKET_BUF_LEN);
- if (dlen <= 0) {
- ret = dlen;
- goto out;
- }
- dout(" decrypted %d bytes\n", dlen);
- dend = dbuf + dlen;
- dp = dbuf;
+ th = get_ticket_handler(ac, type);
+ if (IS_ERR(th)) {
+ ret = PTR_ERR(th);
+ goto out;
+ }
- tkt_struct_v = ceph_decode_8(&dp);
- if (tkt_struct_v != 1)
- goto bad;
+ /* blob for me */
+ dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
+ if (dlen <= 0) {
+ ret = dlen;
+ goto out;
+ }
+ dout(" decrypted %d bytes\n", dlen);
+ dp = dbuf;
+ dend = dp + dlen;
- memcpy(&old_key, &th->session_key, sizeof(old_key));
- ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
- if (ret)
- goto out;
+ tkt_struct_v = ceph_decode_8(&dp);
+ if (tkt_struct_v != 1)
+ goto bad;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
- new_expires = get_seconds() + validity.tv_sec;
- new_renew_after = new_expires - (validity.tv_sec / 4);
- dout(" expires=%lu renew_after=%lu\n", new_expires,
- new_renew_after);
+ memcpy(&old_key, &th->session_key, sizeof(old_key));
+ ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+ if (ret)
+ goto out;
- /* ticket blob for service */
- ceph_decode_8_safe(&p, end, is_enc, bad);
- tp = ticket_buf;
- if (is_enc) {
- /* encrypted */
- dout(" encrypted ticket\n");
- dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
- TEMP_TICKET_BUF_LEN);
- if (dlen < 0) {
- ret = dlen;
- goto out;
- }
- dlen = ceph_decode_32(&tp);
- } else {
- /* unencrypted */
- ceph_decode_32_safe(&p, end, dlen, bad);
- ceph_decode_need(&p, end, dlen, bad);
- ceph_decode_copy(&p, ticket_buf, dlen);
+ ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+ ceph_decode_timespec(&validity, &new_validity);
+ new_expires = get_seconds() + validity.tv_sec;
+ new_renew_after = new_expires - (validity.tv_sec / 4);
+ dout(" expires=%lu renew_after=%lu\n", new_expires,
+ new_renew_after);
+
+ /* ticket blob for service */
+ ceph_decode_8_safe(p, end, is_enc, bad);
+ if (is_enc) {
+ /* encrypted */
+ dout(" encrypted ticket\n");
+ dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
+ if (dlen < 0) {
+ ret = dlen;
+ goto out;
}
- tpend = tp + dlen;
- dout(" ticket blob is %d bytes\n", dlen);
- ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
- blob_struct_v = ceph_decode_8(&tp);
- new_secret_id = ceph_decode_64(&tp);
- ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
- if (ret)
+ tp = ticket_buf;
+ dlen = ceph_decode_32(&tp);
+ } else {
+ /* unencrypted */
+ ceph_decode_32_safe(p, end, dlen, bad);
+ ticket_buf = kmalloc(dlen, GFP_NOFS);
+ if (!ticket_buf) {
+ ret = -ENOMEM;
goto out;
-
- /* all is well, update our ticket */
- ceph_crypto_key_destroy(&th->session_key);
- if (th->ticket_blob)
- ceph_buffer_put(th->ticket_blob);
- th->session_key = new_session_key;
- th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
- th->secret_id = new_secret_id;
- th->expires = new_expires;
- th->renew_after = new_renew_after;
- dout(" got ticket service %d (%s) secret_id %lld len %d\n",
- type, ceph_entity_type_name(type), th->secret_id,
- (int)th->ticket_blob->vec.iov_len);
- xi->have_keys |= th->service;
+ }
+ tp = ticket_buf;
+ ceph_decode_need(p, end, dlen, bad);
+ ceph_decode_copy(p, ticket_buf, dlen);
}
+ tpend = tp + dlen;
+ dout(" ticket blob is %d bytes\n", dlen);
+ ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+ blob_struct_v = ceph_decode_8(&tp);
+ new_secret_id = ceph_decode_64(&tp);
+ ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+ if (ret)
+ goto out;
+
+ /* all is well, update our ticket */
+ ceph_crypto_key_destroy(&th->session_key);
+ if (th->ticket_blob)
+ ceph_buffer_put(th->ticket_blob);
+ th->session_key = new_session_key;
+ th->ticket_blob = new_ticket_blob;
+ th->validity = new_validity;
+ th->secret_id = new_secret_id;
+ th->expires = new_expires;
+ th->renew_after = new_renew_after;
+ dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+ type, ceph_entity_type_name(type), th->secret_id,
+ (int)th->ticket_blob->vec.iov_len);
+ xi->have_keys |= th->service;
- ret = 0;
out:
kfree(ticket_buf);
-out_dbuf:
kfree(dbuf);
return ret;
@@ -270,6 +255,34 @@ bad:
goto out;
}
+static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
+ struct ceph_crypto_key *secret,
+ void *buf, void *end)
+{
+ void *p = buf;
+ u8 reply_struct_v;
+ u32 num;
+ int ret;
+
+ ceph_decode_8_safe(&p, end, reply_struct_v, bad);
+ if (reply_struct_v != 1)
+ return -EINVAL;
+
+ ceph_decode_32_safe(&p, end, num, bad);
+ dout("%d tickets\n", num);
+
+ while (num--) {
+ ret = process_one_ticket(ac, secret, &p, end);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+bad:
+ return -EINVAL;
+}
+
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
@@ -583,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th;
int ret = 0;
struct ceph_x_authorize_reply reply;
+ void *preply = &reply;
void *p = au->reply_buf;
void *end = p + sizeof(au->reply_buf);
th = get_ticket_handler(ac, au->service);
if (IS_ERR(th))
return PTR_ERR(th);
- ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+ ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
if (ret < 0)
return ret;
if (ret != sizeof(reply))
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 6e7a236..06f19b9 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -89,11 +89,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
+/*
+ * Should be used for buffers allocated with ceph_kvmalloc().
+ * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
+ * in-buffer (msg front).
+ *
+ * Dispose of @sgt with teardown_sgtable().
+ *
+ * @prealloc_sg is to avoid memory allocation inside sg_alloc_table()
+ * in cases where a single sg is sufficient. No attempt to reduce the
+ * number of sgs by squeezing physically contiguous pages together is
+ * made though, for simplicity.
+ */
+static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg,
+ const void *buf, unsigned int buf_len)
+{
+ struct scatterlist *sg;
+ const bool is_vmalloc = is_vmalloc_addr(buf);
+ unsigned int off = offset_in_page(buf);
+ unsigned int chunk_cnt = 1;
+ unsigned int chunk_len = PAGE_ALIGN(off + buf_len);
+ int i;
+ int ret;
+
+ if (buf_len == 0) {
+ memset(sgt, 0, sizeof(*sgt));
+ return -EINVAL;
+ }
+
+ if (is_vmalloc) {
+ chunk_cnt = chunk_len >> PAGE_SHIFT;
+ chunk_len = PAGE_SIZE;
+ }
+
+ if (chunk_cnt > 1) {
+ ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS);
+ if (ret)
+ return ret;
+ } else {
+ WARN_ON(chunk_cnt != 1);
+ sg_init_table(prealloc_sg, 1);
+ sgt->sgl = prealloc_sg;
+ sgt->nents = sgt->orig_nents = 1;
+ }
+
+ for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) {
+ struct page *page;
+ unsigned int len = min(chunk_len - off, buf_len);
+
+ if (is_vmalloc)
+ page = vmalloc_to_page(buf);
+ else
+ page = virt_to_page(buf);
+
+ sg_set_page(sg, page, len, off);
+
+ off = 0;
+ buf += len;
+ buf_len -= len;
+ }
+ WARN_ON(buf_len != 0);
+
+ return 0;
+}
+
+static void teardown_sgtable(struct sg_table *sgt)
+{
+ if (sgt->orig_nents > 1)
+ sg_free_table(sgt);
+}
+
static int ceph_aes_encrypt(const void *key, int key_len,
void *dst, size_t *dst_len,
const void *src, size_t src_len)
{
- struct scatterlist sg_in[2], sg_out[1];
+ struct scatterlist sg_in[2], prealloc_sg;
+ struct sg_table sg_out;
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
int ret;
@@ -109,16 +180,18 @@ static int ceph_aes_encrypt(const void *key, int key_len,
*dst_len = src_len + zero_padding;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
sg_init_table(sg_in, 2);
sg_set_buf(&sg_in[0], src, src_len);
sg_set_buf(&sg_in[1], pad, zero_padding);
- sg_init_table(sg_out, 1);
- sg_set_buf(sg_out, dst, *dst_len);
+ ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
+ if (ret)
+ goto out_tfm;
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
iv = crypto_blkcipher_crt(tfm)->iv;
ivsize = crypto_blkcipher_ivsize(tfm);
-
memcpy(iv, aes_iv, ivsize);
+
/*
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
key, key_len, 1);
@@ -127,16 +200,22 @@ static int ceph_aes_encrypt(const void *key, int key_len,
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
pad, zero_padding, 1);
*/
- ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+ ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
src_len + zero_padding);
- crypto_free_blkcipher(tfm);
- if (ret < 0)
+ if (ret < 0) {
pr_err("ceph_aes_crypt failed %d\n", ret);
+ goto out_sg;
+ }
/*
print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
dst, *dst_len, 1);
*/
- return 0;
+
+out_sg:
+ teardown_sgtable(&sg_out);
+out_tfm:
+ crypto_free_blkcipher(tfm);
+ return ret;
}
static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
@@ -144,7 +223,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
const void *src1, size_t src1_len,
const void *src2, size_t src2_len)
{
- struct scatterlist sg_in[3], sg_out[1];
+ struct scatterlist sg_in[3], prealloc_sg;
+ struct sg_table sg_out;
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
int ret;
@@ -160,17 +240,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
*dst_len = src1_len + src2_len + zero_padding;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
sg_init_table(sg_in, 3);
sg_set_buf(&sg_in[0], src1, src1_len);
sg_set_buf(&sg_in[1], src2, src2_len);
sg_set_buf(&sg_in[2], pad, zero_padding);
- sg_init_table(sg_out, 1);
- sg_set_buf(sg_out, dst, *dst_len);
+ ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
+ if (ret)
+ goto out_tfm;
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
iv = crypto_blkcipher_crt(tfm)->iv;
ivsize = crypto_blkcipher_ivsize(tfm);
-
memcpy(iv, aes_iv, ivsize);
+
/*
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
key, key_len, 1);
@@ -181,23 +263,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
pad, zero_padding, 1);
*/
- ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+ ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
src1_len + src2_len + zero_padding);
- crypto_free_blkcipher(tfm);
- if (ret < 0)
+ if (ret < 0) {
pr_err("ceph_aes_crypt2 failed %d\n", ret);
+ goto out_sg;
+ }
/*
print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
dst, *dst_len, 1);
*/
- return 0;
+
+out_sg:
+ teardown_sgtable(&sg_out);
+out_tfm:
+ crypto_free_blkcipher(tfm);
+ return ret;
}
static int ceph_aes_decrypt(const void *key, int key_len,
void *dst, size_t *dst_len,
const void *src, size_t src_len)
{
- struct scatterlist sg_in[1], sg_out[2];
+ struct sg_table sg_in;
+ struct scatterlist sg_out[2], prealloc_sg;
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
struct blkcipher_desc desc = { .tfm = tfm };
char pad[16];
@@ -209,16 +298,16 @@ static int ceph_aes_decrypt(const void *key, int key_len,
if (IS_ERR(tfm))
return PTR_ERR(tfm);
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
- sg_init_table(sg_in, 1);
sg_init_table(sg_out, 2);
- sg_set_buf(sg_in, src, src_len);
sg_set_buf(&sg_out[0], dst, *dst_len);
sg_set_buf(&sg_out[1], pad, sizeof(pad));
+ ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
+ if (ret)
+ goto out_tfm;
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
iv = crypto_blkcipher_crt(tfm)->iv;
ivsize = crypto_blkcipher_ivsize(tfm);
-
memcpy(iv, aes_iv, ivsize);
/*
@@ -227,12 +316,10 @@ static int ceph_aes_decrypt(const void *key, int key_len,
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
src, src_len, 1);
*/
-
- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
- crypto_free_blkcipher(tfm);
+ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
if (ret < 0) {
pr_err("ceph_aes_decrypt failed %d\n", ret);
- return ret;
+ goto out_sg;
}
if (src_len <= *dst_len)
@@ -250,7 +337,12 @@ static int ceph_aes_decrypt(const void *key, int key_len,
print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
dst, *dst_len, 1);
*/
- return 0;
+
+out_sg:
+ teardown_sgtable(&sg_in);
+out_tfm:
+ crypto_free_blkcipher(tfm);
+ return ret;
}
static int ceph_aes_decrypt2(const void *key, int key_len,
@@ -258,7 +350,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
void *dst2, size_t *dst2_len,
const void *src, size_t src_len)
{
- struct scatterlist sg_in[1], sg_out[3];
+ struct sg_table sg_in;
+ struct scatterlist sg_out[3], prealloc_sg;
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
struct blkcipher_desc desc = { .tfm = tfm };
char pad[16];
@@ -270,17 +363,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
if (IS_ERR(tfm))
return PTR_ERR(tfm);
- sg_init_table(sg_in, 1);
- sg_set_buf(sg_in, src, src_len);
sg_init_table(sg_out, 3);
sg_set_buf(&sg_out[0], dst1, *dst1_len);
sg_set_buf(&sg_out[1], dst2, *dst2_len);
sg_set_buf(&sg_out[2], pad, sizeof(pad));
+ ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
+ if (ret)
+ goto out_tfm;
crypto_blkcipher_setkey((void *)tfm, key, key_len);
iv = crypto_blkcipher_crt(tfm)->iv;
ivsize = crypto_blkcipher_ivsize(tfm);
-
memcpy(iv, aes_iv, ivsize);
/*
@@ -289,12 +382,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
src, src_len, 1);
*/
-
- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
- crypto_free_blkcipher(tfm);
+ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
if (ret < 0) {
pr_err("ceph_aes_decrypt failed %d\n", ret);
- return ret;
+ goto out_sg;
}
if (src_len <= *dst1_len)
@@ -324,7 +415,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
dst2, *dst2_len, 1);
*/
- return 0;
+out_sg:
+ teardown_sgtable(&sg_in);
+out_tfm:
+ crypto_free_blkcipher(tfm);
+ return ret;
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 464303f..057017b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,11 @@ int ceph_msgr_init(void)
if (ceph_msgr_slab_init())
return -ENOMEM;
- ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
+ /*
+ * The number of active work items is limited by the number of
+ * connections, so leave @max_active at default.
+ */
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0);
if (ceph_msgr_wq)
return 0;
@@ -556,7 +560,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
return r;
}
-static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, bool more)
{
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
@@ -569,6 +573,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
return ret;
}
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, bool more)
+{
+ int ret;
+ struct kvec iov;
+
+ /* sendpage cannot properly handle pages with page_count == 0,
+ * we need to fallback to sendmsg if that's the case */
+ if (page_count(page) >= 1)
+ return __ceph_tcp_sendpage(sock, page, offset, size, more);
+
+ iov.iov_base = kmap(page) + offset;
+ iov.iov_len = size;
+ ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
+ kunmap(page);
+
+ return ret;
+}
/*
* Shutdown/close the socket for the given connection.
@@ -886,7 +908,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
BUG_ON(page_count > (int)USHRT_MAX);
cursor->page_count = (unsigned short)page_count;
BUG_ON(length > SIZE_MAX - cursor->page_offset);
- cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+ cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
}
static struct page *
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef3..dbcbf5a 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1041,7 +1041,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
if (!m) {
pr_info("alloc_msg unknown type %d\n", type);
*skip = 1;
+ } else if (front_len > m->front_alloc_len) {
+ pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n",
+ front_len, m->front_alloc_len,
+ (unsigned int)con->peer_name.type,
+ le64_to_cpu(con->peer_name.num));
+ ceph_msg_put(m);
+ m = ceph_msg_new(type, front_len, GFP_NOFS, false);
}
+
return m;
}
diff --git a/net/compat.c b/net/compat.c
index f50161f..cbc1a2a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
{
int tot_len;
- if (kern_msg->msg_namelen) {
+ if (kern_msg->msg_name && kern_msg->msg_namelen) {
if (mode == VERIFY_READ) {
int err = move_addr_to_kernel(kern_msg->msg_name,
kern_msg->msg_namelen,
@@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
if (err < 0)
return err;
}
- if (kern_msg->msg_name)
- kern_msg->msg_name = kern_address;
- } else
+ kern_msg->msg_name = kern_address;
+ } else {
kern_msg->msg_name = NULL;
+ kern_msg->msg_namelen = 0;
+ }
tot_len = iov_from_user_compat_to_kern(kern_iov,
(struct compat_iovec __user *)kern_msg->msg_iov,
diff --git a/net/core/dev.c b/net/core/dev.c
index ab4df3d..ece49db 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -175,7 +175,6 @@ static unsigned int napi_gen_id;
static DEFINE_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
-static DEFINE_MUTEX(devnet_rename_mutex);
static inline void dev_base_seq_inc(struct net *net)
{
@@ -197,14 +196,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- raw_spin_lock(&sd->input_pkt_queue.raw_lock);
+ spin_lock(&sd->input_pkt_queue.lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
+ spin_unlock(&sd->input_pkt_queue.lock);
#endif
}
@@ -827,8 +826,7 @@ retry:
strcpy(name, dev->name);
rcu_read_unlock();
if (read_seqcount_retry(&devnet_rename_seq, seq)) {
- mutex_lock(&devnet_rename_mutex);
- mutex_unlock(&devnet_rename_mutex);
+ cond_resched();
goto retry;
}
@@ -1094,28 +1092,30 @@ int dev_change_name(struct net_device *dev, const char *newname)
if (dev->flags & IFF_UP)
return -EBUSY;
- mutex_lock(&devnet_rename_mutex);
- __write_seqcount_begin(&devnet_rename_seq);
+ write_seqcount_begin(&devnet_rename_seq);
- if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
- goto outunlock;
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+ write_seqcount_end(&devnet_rename_seq);
+ return 0;
+ }
memcpy(oldname, dev->name, IFNAMSIZ);
err = dev_get_valid_name(net, dev, newname);
- if (err < 0)
- goto outunlock;
+ if (err < 0) {
+ write_seqcount_end(&devnet_rename_seq);
+ return err;
+ }
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- err = ret;
- goto outunlock;
+ write_seqcount_end(&devnet_rename_seq);
+ return ret;
}
- __write_seqcount_end(&devnet_rename_seq);
- mutex_unlock(&devnet_rename_mutex);
+ write_seqcount_end(&devnet_rename_seq);
write_lock_bh(&dev_base_lock);
hlist_del_rcu(&dev->name_hlist);
@@ -1134,8 +1134,7 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
- mutex_lock(&devnet_rename_mutex);
- __write_seqcount_begin(&devnet_rename_seq);
+ write_seqcount_begin(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
goto rollback;
} else {
@@ -1145,11 +1144,6 @@ rollback:
}
return err;
-
-outunlock:
- __write_seqcount_end(&devnet_rename_seq);
- mutex_unlock(&devnet_rename_mutex);
- return err;
}
/**
@@ -1208,7 +1202,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = 0;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
@@ -1700,6 +1698,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return netif_rx(skb);
}
@@ -2138,7 +2137,6 @@ static inline void __netif_reschedule(struct Qdisc *q)
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
- preempt_check_resched_rt();
}
void __netif_schedule(struct Qdisc *q)
@@ -2160,7 +2158,6 @@ void dev_kfree_skb_irq(struct sk_buff *skb)
sd->completion_queue = skb;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
- preempt_check_resched_rt();
}
}
EXPORT_SYMBOL(dev_kfree_skb_irq);
@@ -2508,20 +2505,29 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, dev, features);
+ if (!vlan_tx_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+ return harmonize_features(skb, dev, features);
+ }
}
- features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX);
+ features = netdev_intersect_features(features,
+ dev->vlan_features |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
- features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX;
+ features = netdev_intersect_features(features,
+ NETIF_F_SG |
+ NETIF_F_HIGHDMA |
+ NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
return harmonize_features(skb, dev, features);
}
@@ -3212,7 +3218,6 @@ enqueue:
rps_unlock(sd);
local_irq_restore(flags);
- preempt_check_resched_rt();
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
@@ -3250,7 +3255,7 @@ int netif_rx(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- migrate_disable();
+ preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -3260,13 +3265,13 @@ int netif_rx(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- migrate_enable();
+ preempt_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
- put_cpu_light();
+ ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+ put_cpu();
}
return ret;
}
@@ -3276,44 +3281,16 @@ int netif_rx_ni(struct sk_buff *skb)
{
int err;
- local_bh_disable();
+ preempt_disable();
err = netif_rx(skb);
- local_bh_enable();
+ if (local_softirq_pending())
+ do_softirq();
+ preempt_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * RT runs ksoftirqd as a real time thread and the root_lock is a
- * "sleeping spinlock". If the trylock fails then we can go into an
- * infinite loop when ksoftirqd preempted the task which actually
- * holds the lock, because we requeue q and raise NET_TX softirq
- * causing ksoftirqd to loop forever.
- *
- * It's safe to use spin_lock on RT here as softirqs run in thread
- * context and cannot deadlock against the thread which is holding
- * root_lock.
- *
- * On !RT the trylock might fail, but there we bail out from the
- * softirq loop after 10 attempts which we can't do on RT. And the
- * task holding root_lock cannot be preempted, so the only downside of
- * that trylock is that we need 10 loops to decide that we should have
- * given up in the first one :)
- */
-static inline int take_root_lock(spinlock_t *lock)
-{
- spin_lock(lock);
- return 1;
-}
-#else
-static inline int take_root_lock(spinlock_t *lock)
-{
- return spin_trylock(lock);
-}
-#endif
-
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -3352,7 +3329,7 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (take_root_lock(root_lock)) {
+ if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
@@ -3550,7 +3527,7 @@ another_round:
if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto unlock;
}
@@ -3743,7 +3720,7 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->input_pkt_queue);
- __skb_queue_tail(&sd->tofree_queue, skb);
+ kfree_skb(skb);
input_queue_head_incr(sd);
}
}
@@ -3752,13 +3729,10 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->process_queue);
- __skb_queue_tail(&sd->tofree_queue, skb);
+ kfree_skb(skb);
input_queue_head_incr(sd);
}
}
-
- if (!skb_queue_empty(&sd->tofree_queue))
- raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3999,6 +3973,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
}
@@ -4115,7 +4090,6 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
} else
#endif
local_irq_enable();
- preempt_check_resched_rt();
}
static int process_backlog(struct napi_struct *napi, int quota)
@@ -4188,7 +4162,6 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags);
____napi_schedule(&__get_cpu_var(softnet_data), n);
local_irq_restore(flags);
- preempt_check_resched_rt();
}
EXPORT_SYMBOL(__napi_schedule);
@@ -4318,17 +4291,10 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
- struct sk_buff *skb;
void *have;
local_irq_disable();
- while ((skb = __skb_dequeue(&sd->tofree_queue))) {
- local_irq_enable();
- kfree_skb(skb);
- local_irq_disable();
- }
-
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
@@ -5017,6 +4983,7 @@ void __dev_set_rx_mode(struct net_device *dev)
if (ops->ndo_set_rx_mode)
ops->ndo_set_rx_mode(dev);
}
+EXPORT_SYMBOL(__dev_set_rx_mode);
void dev_set_rx_mode(struct net_device *dev)
{
@@ -5295,7 +5262,7 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
-static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
@@ -5683,13 +5650,8 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_SOFT_FEATURES;
dev->wanted_features = dev->features & dev->hw_features;
- /* Turn on no cache copy if HW is doing checksum */
if (!(dev->flags & IFF_LOOPBACK)) {
dev->hw_features |= NETIF_F_NOCACHE_COPY;
- if (dev->features & NETIF_F_ALL_CSUM) {
- dev->wanted_features |= NETIF_F_NOCACHE_COPY;
- dev->features |= NETIF_F_NOCACHE_COPY;
- }
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6255,6 +6217,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -6264,6 +6229,7 @@ void unregister_netdevice_many(struct list_head *head)
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
@@ -6447,7 +6413,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
- preempt_check_resched_rt();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
@@ -6458,9 +6423,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx(skb);
input_queue_head_incr(oldsd);
}
- while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
- kfree_skb(skb);
- }
return NOTIFY_OK;
}
@@ -6723,7 +6685,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}
@@ -6772,9 +6733,8 @@ static int __init net_dev_init(void)
struct softnet_data *sd = &per_cpu(softnet_data, i);
memset(sd, 0, sizeof(*sd));
- skb_queue_head_init_raw(&sd->input_pkt_queue);
- skb_queue_head_init_raw(&sd->process_queue);
- skb_queue_head_init_raw(&sd->tofree_queue);
+ skb_queue_head_init(&sd->input_pkt_queue);
+ skb_queue_head_init(&sd->process_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue = NULL;
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..15b6792 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -267,6 +267,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/core/filter.c b/net/core/filter.c
index ad30d62..ebce437 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -355,6 +355,8 @@ load_b:
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
@@ -371,11 +373,13 @@ load_b:
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = (struct nlattr *)&skb->data[A];
- if (nla->nla_len > A - skb->len)
+ if (nla->nla_len > skb->len - A)
return 0;
nla = nla_find_nested(nla, X);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 7d84ea1..8254497 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
{
int size, ct, err;
- if (m->msg_namelen) {
+ if (m->msg_name && m->msg_namelen) {
if (mode == VERIFY_READ) {
void __user *namep;
namep = (void __user __force *) m->msg_name;
@@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
if (err < 0)
return err;
}
- if (m->msg_name)
- m->msg_name = address;
+ m->msg_name = address;
} else {
m->msg_name = NULL;
+ m->msg_namelen = 0;
}
size = m->msg_iovlen * sizeof(struct iovec);
@@ -107,6 +107,10 @@ EXPORT_SYMBOL(memcpy_toiovecend);
int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
int offset, int len)
{
+ /* No data? Done! */
+ if (len == 0)
+ return 0;
+
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 81d3a9a..7c8ffd9 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -24,7 +24,7 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-static DEFINE_MUTEX(net_mutex);
+DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9b40f23..9d42f3b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -785,7 +785,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
}
if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto out;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 37b492e..f322475 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
+/* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace.
+ */
+static void rtnl_lock_unregistering_all(void)
+{
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ for_each_net(net) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
/**
* rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- rtnl_lock();
+ /* Close the race with cleanup_net() */
+ mutex_lock(&net_mutex);
+ rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
+ mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
@@ -708,13 +739,15 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
- nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_link_state)));
return size;
} else
return 0;
}
-static size_t rtnl_port_size(const struct net_device *dev)
+static size_t rtnl_port_size(const struct net_device *dev,
+ u32 ext_filter_mask)
{
size_t port_size = nla_total_size(4) /* PORT_VF */
+ nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
@@ -730,7 +763,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+ port_size;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
if (dev_num_vf(dev->dev.parent))
return port_self_size + vf_ports_size +
@@ -765,7 +799,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
- + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
@@ -827,11 +861,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
+ u32 ext_filter_mask)
{
int err;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
err = rtnl_port_self_fill(skb, dev);
@@ -1016,7 +1052,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_nest_end(skb, vfinfo);
}
- if (rtnl_port_fill(skb, dev))
+ if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
if (dev->rtnl_link_ops) {
@@ -1070,6 +1106,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_head *head;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
+ int err;
+ int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -1077,8 +1115,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->dev_base_seq;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1090,11 +1137,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
- if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI,
- ext_filter_mask) <= 0)
+ err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NLM_F_MULTI,
+ ext_filter_mask);
+ /* If we ran out of room on the first message,
+ * we're in trouble
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+ if (err <= 0)
goto out;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1324,7 +1377,8 @@ static int do_set_master(struct net_device *dev, int ifindex)
return 0;
}
-static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb,
+ struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -1336,7 +1390,8 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
err = PTR_ERR(net);
goto errout;
}
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
err = -EPERM;
goto errout;
}
@@ -1590,7 +1645,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- err = do_setlink(dev, ifm, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, tb, ifname, 0);
errout:
return err;
}
@@ -1630,7 +1685,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
- list_del(&list_kill);
return 0;
}
@@ -1708,7 +1762,8 @@ err:
}
EXPORT_SYMBOL(rtnl_create_link);
-static int rtnl_group_changelink(struct net *net, int group,
+static int rtnl_group_changelink(const struct sk_buff *skb,
+ struct net *net, int group,
struct ifinfomsg *ifm,
struct nlattr **tb)
{
@@ -1717,7 +1772,7 @@ static int rtnl_group_changelink(struct net *net, int group,
for_each_netdev(net, dev) {
if (dev->group == group) {
- err = do_setlink(dev, ifm, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, tb, NULL, 0);
if (err < 0)
return err;
}
@@ -1819,12 +1874,12 @@ replay:
modified = 1;
}
- return do_setlink(dev, ifm, tb, ifname, modified);
+ return do_setlink(skb, dev, ifm, tb, ifname, modified);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
- return rtnl_group_changelink(net,
+ return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, tb);
return -ENODEV;
@@ -1936,9 +1991,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
+ int hdrlen;
+
+ /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -2205,7 +2264,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
int err = -EINVAL;
__u8 *addr;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2657,7 +2716,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 8d9d05e..d0afc32 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -95,31 +95,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
- u32 hash[MD5_DIGEST_WORDS];
-
- net_secret_init();
- hash[0] = (__force __u32) daddr;
- hash[1] = net_secret[13];
- hash[2] = net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
- __u32 hash[4];
-
- net_secret_init();
- memcpy(hash, daddr, 16);
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da24627..17313d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,7 +62,7 @@
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <linux/prefetch.h>
-#include <linux/locallock.h>
+#include <linux/if_vlan.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -335,7 +335,6 @@ struct netdev_alloc_cache {
unsigned int pagecnt_bias;
};
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
@@ -344,7 +343,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
int order;
unsigned long flags;
- local_lock_irqsave(netdev_alloc_lock, flags);
+ local_irq_save(flags);
nc = &__get_cpu_var(netdev_alloc_cache);
if (unlikely(!nc->frag.page)) {
refill:
@@ -378,7 +377,7 @@ recycle:
nc->frag.offset += fragsz;
nc->pagecnt_bias--;
end:
- local_unlock_irqrestore(netdev_alloc_lock, flags);
+ local_irq_restore(flags);
return data;
}
@@ -2717,81 +2716,85 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
* skb_segment - Perform protocol segmentation on skb.
- * @skb: buffer to segment
+ * @head_skb: buffer to segment
* @features: features for the output path (see dev->features)
*
* This function performs segmentation on the given skb. It returns
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *skb_segment(struct sk_buff *head_skb,
+ netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
- struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
- skb_frag_t *skb_frag = skb_shinfo(skb)->frags;
- unsigned int mss = skb_shinfo(skb)->gso_size;
- unsigned int doffset = skb->data - skb_mac_header(skb);
+ struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
+ skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ unsigned int mss = skb_shinfo(head_skb)->gso_size;
+ unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+ struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
- unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int headroom;
unsigned int len;
__be16 proto;
bool csum;
int sg = !!(features & NETIF_F_SG);
- int nfrags = skb_shinfo(skb)->nr_frags;
+ int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
int pos;
- proto = skb_network_protocol(skb);
+ __skb_push(head_skb, doffset);
+ proto = skb_network_protocol(head_skb);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
csum = !!can_checksum_protocol(features, proto);
- __skb_push(skb, doffset);
- headroom = skb_headroom(skb);
- pos = skb_headlen(skb);
+
+ headroom = skb_headroom(head_skb);
+ pos = skb_headlen(head_skb);
do {
struct sk_buff *nskb;
- skb_frag_t *frag;
+ skb_frag_t *nskb_frag;
int hsize;
int size;
- len = skb->len - offset;
+ len = head_skb->len - offset;
if (len > mss)
len = mss;
- hsize = skb_headlen(skb) - offset;
+ hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
hsize = 0;
if (hsize > len || !sg)
hsize = len;
- if (!hsize && i >= nfrags && skb_headlen(fskb) &&
- (skb_headlen(fskb) == len || sg)) {
- BUG_ON(skb_headlen(fskb) > len);
+ if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
+ (skb_headlen(list_skb) == len || sg)) {
+ BUG_ON(skb_headlen(list_skb) > len);
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
- pos += skb_headlen(fskb);
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
+ pos += skb_headlen(list_skb);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
- size = skb_frag_size(skb_frag);
+ size = skb_frag_size(frag);
if (pos + size > offset + len)
break;
i++;
pos += size;
- skb_frag++;
+ frag++;
}
- nskb = skb_clone(fskb, GFP_ATOMIC);
- fskb = fskb->next;
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ list_skb = list_skb->next;
if (unlikely(!nskb))
goto err;
@@ -2812,7 +2815,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
__skb_push(nskb, doffset);
} else {
nskb = __alloc_skb(hsize + doffset + headroom,
- GFP_ATOMIC, skb_alloc_rx_flag(skb),
+ GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
NUMA_NO_NODE);
if (unlikely(!nskb))
@@ -2828,19 +2831,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
segs = nskb;
tail = nskb;
- __copy_skb_header(nskb, skb);
- nskb->mac_len = skb->mac_len;
+ __copy_skb_header(nskb, head_skb);
/* nskb and skb might have different headroom */
if (nskb->ip_summed == CHECKSUM_PARTIAL)
nskb->csum_start += skb_headroom(nskb) - headroom;
skb_reset_mac_header(nskb);
- skb_set_network_header(nskb, skb->mac_len);
+ skb_set_network_header(nskb, head_skb->mac_len);
nskb->transport_header = (nskb->network_header +
- skb_network_header_len(skb));
+ skb_network_header_len(head_skb));
+ skb_reset_mac_len(nskb);
- skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
nskb->data - tnl_hlen,
doffset + tnl_hlen);
@@ -2849,30 +2852,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
- nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
continue;
}
- frag = skb_shinfo(nskb)->frags;
+ nskb_frag = skb_shinfo(nskb)->frags;
- skb_copy_from_linear_data_offset(skb, offset,
+ skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(fskb));
+ BUG_ON(skb_headlen(list_skb));
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
BUG_ON(!nfrags);
- fskb = fskb->next;
+ list_skb = list_skb->next;
}
if (unlikely(skb_shinfo(nskb)->nr_frags >=
@@ -2883,27 +2888,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
goto err;
}
- *frag = *skb_frag;
- __skb_frag_ref(frag);
- size = skb_frag_size(frag);
+ if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
+ goto err;
+
+ *nskb_frag = *frag;
+ __skb_frag_ref(nskb_frag);
+ size = skb_frag_size(nskb_frag);
if (pos < offset) {
- frag->page_offset += offset - pos;
- skb_frag_size_sub(frag, offset - pos);
+ nskb_frag->page_offset += offset - pos;
+ skb_frag_size_sub(nskb_frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
if (pos + size <= offset + len) {
i++;
- skb_frag++;
+ frag++;
pos += size;
} else {
- skb_frag_size_sub(frag, pos + size - (offset + len));
+ skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
goto skip_fraglist;
}
- frag++;
+ nskb_frag++;
}
skip_fraglist:
@@ -2917,15 +2925,12 @@ perform_csum_check:
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
}
- } while ((offset += len) < skb->len);
+ } while ((offset += len) < head_skb->len);
return segs;
err:
- while ((skb = segs)) {
- segs = skb->next;
- kfree_skb(skb);
- }
+ kfree_skb_list(segs);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(skb_segment);
@@ -3518,6 +3523,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->local_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
+ skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
@@ -3537,12 +3543,66 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
- unsigned int hdr_len;
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len = tcp_hdrlen(skb);
- else
- hdr_len = sizeof(struct udphdr);
- return hdr_len + shinfo->gso_size;
+ return tcp_hdrlen(skb) + shinfo->gso_size;
+
+ /* UFO sets gso_size to the size of the fragmentation
+ * payload, i.e. the size of the L4 (UDP) header is already
+ * accounted for.
+ */
+ return shinfo->gso_size;
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+{
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ skb->mac_header += VLAN_HLEN;
+ return skb;
+}
+
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
+{
+ struct vlan_hdr *vhdr;
+ u16 vlan_tci;
+
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ /* vlan_tci is already set-up so leave this for another time */
+ return skb;
+ }
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ goto err_free;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ goto err_free;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ vlan_tci = ntohs(vhdr->h_vlan_TCI);
+ __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
+
+ skb_pull_rcsum(skb, VLAN_HLEN);
+ vlan_set_encap_proto(skb, vhdr);
+
+ skb = skb_reorder_vlan_header(skb);
+ if (unlikely(!skb))
+ goto err_free;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_reset_mac_len(skb);
+
+ return skb;
+
+err_free:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(skb_vlan_untag);
diff --git a/net/core/sock.c b/net/core/sock.c
index 410bb4c..f9ec2f5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,55 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+/**
+ * sk_ns_capable - General socket capability test
+ * @sk: Socket to use a capability on or through
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in the user
+ * namespace @user_ns.
+ */
+bool sk_ns_capable(const struct sock *sk,
+ struct user_namespace *user_ns, int cap)
+{
+ return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(sk_ns_capable);
+
+/**
+ * sk_capable - Socket global capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The global capbility to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in all user
+ * namespaces.
+ */
+bool sk_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(sk_capable);
+
+/**
+ * sk_net_capable - Network namespace socket capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socke was created
+ * and the current process has the capability @cap over the network namespace
+ * the socket is a member of.
+ */
+bool sk_net_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(sk_net_capable);
+
+
#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
@@ -2339,11 +2388,12 @@ void lock_sock_nested(struct sock *sk, int subclass)
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock_bh(&sk->sk_lock.slock);
+ spin_unlock(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
*/
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+ local_bh_enable();
}
EXPORT_SYMBOL(lock_sock_nested);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a0e9cf6..c38e7a2 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
}
EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
-int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype)
{
struct nlattr *attr;
@@ -57,7 +57,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
unsigned int len;
int err = 0;
- if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+ if (!may_report_filterinfo) {
nla_reserve(skb, attrtype, 0);
return 0;
}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 40d5829..1074ffb 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1670,7 +1670,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlmsghdr *reply_nlh = NULL;
const struct reply_func *fn;
- if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN))
+ if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index dd0dfb2..70f2549 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -573,7 +573,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct dn_ifaddr __rcu **ifap;
int err = -EINVAL;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
@@ -617,7 +617,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct dn_ifaddr *ifa;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 57dc159..d332aef 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *attrs[RTA_MAX+1];
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
@@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *attrs[RTA_MAX+1];
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe3..f3dc69a 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
/* Eventually we might send routing messages too */
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index c32be29..ede0e2d 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -150,7 +150,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
if (!*_result)
goto put;
- memcpy(*_result, upayload->data, len + 1);
+ memcpy(*_result, upayload->data, len);
+ *_result[len] = '\0';
+
if (_expiry)
*_expiry = rkey->expiry;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 19e3637..5f3dc1d 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -86,18 +86,26 @@ out:
}
EXPORT_SYMBOL(ip4_datagram_connect);
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
void ip4_datagram_release_cb(struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ip_options_rcu *inet_opt;
__be32 daddr = inet->inet_daddr;
+ struct dst_entry *dst;
struct flowi4 fl4;
struct rtable *rt;
- if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
- return;
-
rcu_read_lock();
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ rcu_read_unlock();
+ return;
+ }
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk)
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
- if (!IS_ERR(rt))
- __sk_dst_set(sk, &rt->dst);
+
+ dst = !IS_ERR(rt) ? &rt->dst : NULL;
+ sk_dst_set(sk, dst);
+
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2e1573..8f7bd56 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
else
res->tclassid = 0;
#endif
+
+ if (err == -ESRCH)
+ err = -ENETUNREACH;
+
return err;
}
EXPORT_SYMBOL_GPL(__fib_lookup);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d5dbca5..ec12b16 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -534,7 +534,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
return 1;
attrlen = rtnh_attrlen(rtnh);
- if (attrlen < 0) {
+ if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
@@ -819,13 +819,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
+ fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
if (!fi->fib_metrics)
goto failure;
} else
fi->fib_metrics = (u32 *) dst_default_metrics;
- fib_info_cnt++;
fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1f0c7e0..ff670ca 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -69,7 +69,6 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
-#include <linux/sysrq.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
@@ -706,8 +705,6 @@ static void icmp_unreach(struct sk_buff *skb)
&iph->daddr);
} else {
info = ntohs(icmph->un.frag.mtu);
- if (!info)
- goto out;
}
break;
case ICMP_SR_FAILED:
@@ -777,30 +774,6 @@ static void icmp_redirect(struct sk_buff *skb)
}
/*
- * 32bit and 64bit have different timestamp length, so we check for
- * the cookie at offset 20 and verify it is repeated at offset 50
- */
-#define CO_POS0 20
-#define CO_POS1 50
-#define CO_SIZE sizeof(int)
-#define ICMP_SYSRQ_SIZE 57
-
-/*
- * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
- * pattern and if it matches send the next byte as a trigger to sysrq.
- */
-static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
-{
- int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
- char *p = skb->data;
-
- if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
- !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
- p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
- handle_sysrq(p[CO_POS0 + CO_SIZE]);
-}
-
-/*
* Handle ICMP_ECHO ("ping") requests.
*
* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
@@ -827,11 +800,6 @@ static void icmp_echo(struct sk_buff *skb)
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
-
- if (skb->len == ICMP_SYSRQ_SIZE &&
- net->ipv4.sysctl_icmp_echo_sysrq) {
- icmp_check_sysrq(net, skb);
- }
}
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7defdc9..94d40cc 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -1952,6 +1952,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
+ if (!in_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list;
(iml = rtnl_dereference(*imlp)) != NULL;
@@ -1969,16 +1973,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- if (in_dev)
- ip_mc_dec_group(in_dev, group);
+ ip_mc_dec_group(in_dev, group);
rtnl_unlock();
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
- if (!in_dev)
- ret = -ENODEV;
+out:
rtnl_unlock();
return ret;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 33d5537..67140ef 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
* Theory of operations.
* We keep one entry for each peer IP address. The nodes contains long-living
* information about the peer which doesn't depend on routes.
- * At this moment this information consists only of ID field for the next
- * outgoing IP packet. This field is incremented with each packet as encoded
- * in inet_getid() function (include/net/inetpeer.h).
- * At the moment of writing this notes identifier of IP packets is generated
- * to be unpredictable using this code only for packets subjected
- * (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size when local fragmentation is disabled use a constant ID and do
- * not use this code (see ip_select_ident() in include/net/ip.h).
*
- * Route cache entries hold references to our nodes.
- * New cache entries get references via lookup by destination IP address in
- * the avl tree. The reference is grabbed only when it's needed i.e. only
- * when we try to output IP packet which needs an unpredictable ID (see
- * __ip_select_ident() in net/ipv4/route.c).
* Nodes are removed only when reference counter goes to 0.
* When it's happened the node may be removed when a sufficient amount of
* time has been passed since its last use. The less-recently-used entry can
@@ -62,7 +49,6 @@
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* daddr: unchangeable
- * ip_id_count: atomic value (no lock needed)
*/
static struct kmem_cache *peer_cachep __read_mostly;
@@ -504,10 +490,6 @@ relookup:
p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count,
- (daddr->family == AF_INET) ?
- secure_ip_id(daddr->addr.a4) :
- secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 98d7e53..bd1c5ba 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
static bool ip_may_fragment(const struct sk_buff *skb)
{
return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
- !skb->local_df;
+ skb->local_df;
}
static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d306360..b3becd0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -463,6 +463,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
+ dev->type = ARPHRD_IPGRE;
ip_tunnel_setup(dev, ipgre_net_id);
}
@@ -501,7 +502,6 @@ static int ipgre_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_IPGRE;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->addr_len = 4;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec72645..089ed81 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
optptr++;
continue;
}
+ if (unlikely(l < 2)) {
+ pp_ptr = optptr;
+ goto error;
+ }
optlen = optptr[1];
if (optlen<2 || optlen>l) {
pp_ptr = optptr;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8bb3b4a..c2dcee2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -79,7 +79,6 @@
#include <linux/mroute.h>
#include <linux/netlink.h>
#include <linux/tcp.h>
-#include <linux/locallock.h>
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
@@ -149,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -387,8 +386,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(skb, &rt->dst, sk,
- (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -1330,7 +1328,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1469,9 +1467,6 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
.uc_ttl = -1,
};
-/* serialize concurrent calls on the same CPU to ip_send_unicast_reply */
-static DEFINE_LOCAL_IRQ_LOCK(unicast_lock);
-
void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
__be32 saddr, const struct ip_reply_arg *arg,
unsigned int len)
@@ -1483,6 +1478,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
struct sk_buff *nskb;
struct sock *sk;
struct inet_sock *inet;
+ int err;
if (ip_options_echo(&replyopts.opt.opt, skb))
return;
@@ -1509,7 +1505,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
if (IS_ERR(rt))
return;
- inet = &get_locked_var(unicast_lock, unicast_sock);
+ inet = &get_cpu_var(unicast_sock);
inet->tos = arg->tos;
sk = &inet->sk;
@@ -1519,8 +1515,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
sock_net_set(sk, net);
__skb_queue_head_init(&sk->sk_write_queue);
sk->sk_sndbuf = sysctl_wmem_default;
- ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
- &ipc, &rt, MSG_DONTWAIT);
+ err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
+ len, 0, &ipc, &rt, MSG_DONTWAIT);
+ if (unlikely(err)) {
+ ip_flush_pending_frames(sk);
+ goto out;
+ }
+
nskb = skb_peek(&sk->sk_write_queue);
if (nskb) {
if (arg->csumoffset >= 0)
@@ -1532,8 +1533,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
-
- put_locked_var(unicast_lock, unicast_sock);
+out:
+ put_cpu_var(unicast_sock);
ip_rt_put(rt);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 3bedb26..edd5a81 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -166,6 +166,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -182,10 +183,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
head = &itn->tunnels[hash];
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
continue;
if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -202,6 +204,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -433,6 +437,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ skb_reset_network_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -853,6 +859,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
}
rtnl_unlock();
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c31e3ad..ff3f84f 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out(skb);
if (unlikely(net_xmit_eval(err)))
@@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
skb_pull_rcsum(skb, hdr_len);
if (inner_proto == htons(ETH_P_TEB)) {
- struct ethhdr *eh = (struct ethhdr *)skb->data;
+ struct ethhdr *eh;
if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
return -ENOMEM;
+ eh = (struct ethhdr *)skb->data;
if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
skb->protocol = eh->h_proto;
else
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 26847e1..33e2bf8 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -271,6 +271,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &vti_netdev_ops;
+ dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id);
}
@@ -282,7 +283,6 @@ static int vti_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
dev->mtu = ETH_DATA_LEN;
dev->flags = IFF_NOARP;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7f80fb4..077f900 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, IPPROTO_IPIP, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPIP, 0);
err = 0;
goto out;
@@ -485,4 +485,5 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6fbf339..648ba5e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1661,7 +1661,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(skb, skb_dst(skb), NULL);
+ ip_select_ident(skb, NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 85a4f21..c8abe31 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1039,8 +1039,10 @@ static int __do_replace(struct net *net, const char *name,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d23118d..651c107 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1226,8 +1226,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index cbc2215..9cb993c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net,
ub->qlen++;
pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* We might not have a timestamp, get one */
if (skb->tstamp.tv64 == 0)
@@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net,
}
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
- else
- *(pm->prefix) = '\0';
if (in && in->hard_header_len > 0 &&
skb->mac_header != skb->network_header &&
@@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net,
if (in)
strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
- else
- pm->indev_name[0] = '\0';
if (out)
strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
- else
- pm->outdev_name[0] = '\0';
/* copy_len <= skb->len, so can't fail. */
if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 7428155..4cfb3bd 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
err = ip_defrag(skb, user);
local_bh_enable();
- if (!err)
+ if (!err) {
ip_send_check(ip_hdr(skb));
+ skb->local_df = 1;
+ }
return err;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c211607..8bd51f4 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -214,6 +214,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
&ipv6_hdr(skb)->daddr))
continue;
#endif
+ } else {
+ continue;
}
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7d3db78..6183d36 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2557b9a..f7fe946 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -465,39 +466,53 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-/*
- * Peer allocation may fail only in serious out-of-memory conditions. However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+ atomic_t id;
+ u32 stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
*/
-static void ip_select_fb_ident(struct iphdr *iph)
+u32 ip_idents_reserve(u32 hash, int segs)
{
- static DEFINE_SPINLOCK(ip_fb_id_lock);
- static u32 ip_fallback_id;
- u32 salt;
+ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 now = (u32)jiffies;
+ u32 delta = 0;
+
+ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) {
+ u64 x = prandom_u32();
+
+ x *= (now - old);
+ delta = (u32)(x >> 32);
+ }
- spin_lock_bh(&ip_fb_id_lock);
- salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
- iph->id = htons(salt & 0xFFFF);
- ip_fallback_id = salt;
- spin_unlock_bh(&ip_fb_id_lock);
+ return atomic_add_return(segs + delta, &bucket->id) - segs;
}
+EXPORT_SYMBOL(ip_idents_reserve);
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
{
- struct net *net = dev_net(dst->dev);
- struct inet_peer *peer;
+ static u32 ip_idents_hashrnd __read_mostly;
+ static bool hashrnd_initialized = false;
+ u32 hash, id;
- peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
- if (peer) {
- iph->id = htons(inet_getid(peer, more));
- inet_putpeer(peer);
- return;
+ if (unlikely(!hashrnd_initialized)) {
+ hashrnd_initialized = true;
+ get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
}
- ip_select_fb_ident(iph);
+ hash = jhash_3words((__force u32)iph->daddr,
+ (__force u32)iph->saddr,
+ iph->protocol,
+ ip_idents_hashrnd);
+ id = ip_idents_reserve(hash, segs);
+ iph->id = htons(id);
}
EXPORT_SYMBOL(__ip_select_ident);
@@ -1032,20 +1047,21 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- struct dst_entry *dst;
+ struct dst_entry *odst = NULL;
bool new = false;
bh_lock_sock(sk);
- rt = (struct rtable *) __sk_dst_get(sk);
+ odst = sk_dst_get(sk);
- if (sock_owned_by_user(sk) || !rt) {
+ if (sock_owned_by_user(sk) || !odst) {
__ipv4_sk_update_pmtu(skb, sk, mtu);
goto out;
}
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- if (!__sk_dst_check(sk, 0)) {
+ rt = (struct rtable *)odst;
+ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1055,8 +1071,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
- dst = dst_check(&rt->dst, 0);
- if (!dst) {
+ if (!dst_check(&rt->dst, 0)) {
if (new)
dst_release(&rt->dst);
@@ -1068,10 +1083,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
if (new)
- __sk_dst_set(sk, &rt->dst);
+ sk_dst_set(sk, &rt->dst);
out:
bh_unlock_sock(sk);
+ dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
@@ -1525,7 +1541,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct in_device *out_dev;
unsigned int flags = 0;
bool do_cache;
- u32 itag;
+ u32 itag = 0;
/* get a working reference to the output device */
out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -2254,9 +2270,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
return rt;
if (flp4->flowi4_proto)
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0);
return rt;
}
@@ -2358,7 +2374,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
goto nla_put_failure;
}
@@ -2711,6 +2727,12 @@ int __init ip_rt_init(void)
{
int rc = 0;
+ ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+ if (!ip_idents)
+ panic("IP: failed to allocate ip_idents\n");
+
+ prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 99461ed..540279f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -812,13 +812,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "icmp_echo_sysrq",
- .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "icmp_ignore_bogus_error_responses",
.data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 531ab57..a880ccc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1064,7 +1064,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1237,6 +1237,7 @@ wait_for_memory:
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -2908,61 +2909,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
-
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
-
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- }
- free_percpu(pool);
-}
+static bool tcp_md5sig_pool_populated = false;
static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
- struct tcp_md5sig_pool __percpu *pool;
-
- pool = alloc_percpu(struct tcp_md5sig_pool);
- if (!pool)
- return;
for_each_possible_cpu(cpu) {
- struct crypto_hash *hash;
-
- hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
- goto out_free;
+ if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
+ struct crypto_hash *hash;
- per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+ hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR_OR_NULL(hash))
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
+ }
}
- /* before setting tcp_md5sig_pool, we must commit all writes
- * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool = pool;
- return;
-out_free:
- __tcp_free_md5sig_pool(pool);
+ tcp_md5sig_pool_populated = true;
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool)) {
+ if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool)
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool != NULL;
+ return tcp_md5sig_pool_populated;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -2976,13 +2958,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *p;
-
local_bh_disable();
- p = ACCESS_ONCE(tcp_md5sig_pool);
- if (p)
- return __this_cpu_ptr(p);
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
local_bh_enable();
return NULL;
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b6ae92a..894b7ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -408,7 +408,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ratio += cnt;
- ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
+ ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
}
/* Some calls are for duplicates without timetamps */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 068c8fb..172cd99 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1064,7 +1064,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1120,7 +1120,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
unsigned int new_len = (pkt_len / mss) * mss;
if (!in_sack && new_len < pkt_len) {
new_len += mss;
- if (new_len > skb->len)
+ if (new_len >= skb->len)
return 0;
}
pkt_len = new_len;
@@ -1144,7 +1144,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (tp->undo_marker && tp->undo_retrans &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
@@ -1845,7 +1845,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
- tp->undo_retrans = 0;
+ tp->undo_retrans = -1;
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2613,7 +2613,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tp->retrans_out ? : -1;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
@@ -2628,18 +2628,16 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
*/
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
- if (flag & FLAG_ORIG_SACK_ACKED) {
- /* Step 3.b. A timeout is spurious if not all data are
- * lost, i.e., never-retransmitted data are (s)acked.
- */
- tcp_try_undo_loss(sk, true);
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
return;
- }
+
if (after(tp->snd_nxt, tp->high_seq) &&
(flag & FLAG_DATA_SACKED || is_dupack)) {
tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
@@ -2655,12 +2653,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
- icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
@@ -3349,8 +3344,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
- if (after(ack, prior_snd_una))
+ if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
+ icsk->icsk_retransmits = 0;
+ }
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5031f68..45f3703 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -299,6 +299,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -2117,6 +2118,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
@@ -2796,7 +2798,6 @@ struct proto tcp_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v4_mtu_reduced,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 826fc6f..b4435ae 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -785,7 +785,7 @@ void tcp_release_cb(struct sock *sk)
__sock_put(sk);
}
if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
- sk->sk_prot->mtu_reduced(sk);
+ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
}
@@ -1871,7 +1871,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
- if (tso_segs == 1) {
+ if (tso_segs == 1 || !sk->sk_gso_max_segs) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
nonagle : TCP_NAGLE_PUSH))))
@@ -1908,7 +1908,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
limit = mss_now;
- if (tso_segs > 1 && !tcp_urg_mode(tp))
+ if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
min_t(unsigned int,
cwnd_quota,
@@ -2045,9 +2045,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);
/* Record snd_nxt for loss detection. */
if (likely(!err))
@@ -2437,8 +2435,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
@@ -2446,6 +2442,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
} else {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 80fa2bf..c042e52 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -218,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
+ target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ac43cd7..b4d1858 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
rtt = veno->minrtt;
- target_cwnd = (tp->snd_cwnd * veno->basertt);
+ target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index b5663c3..e3f6483 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,12 +117,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(skb, NULL);
return 0;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5bec666..5e30677 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1418,7 +1418,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
if (w->skip) {
w->skip--;
- continue;
+ goto skip;
}
err = w->func(w);
@@ -1428,6 +1428,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->count++;
continue;
}
+skip:
w->state = FWS_U;
case FWS_U:
if (fn == w->root)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index bf4a9a0..7d640f2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -511,11 +511,11 @@ static int ip6gre_rcv(struct sk_buff *skb)
skb->protocol = gre_proto;
/* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
+ * - Change protocol to IPv6
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
- skb->protocol = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IPV6);
if ((*(h + offset) & 0xF0) != 0x40)
offset += 4;
}
@@ -790,7 +790,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv4_get_dsfield(iph);
@@ -840,7 +840,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -965,8 +965,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
else
dev->flags &= ~IFF_POINTOPOINT;
- dev->iflink = p->link;
-
/* Precalculate GRE options length */
if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
if (t->parms.o_flags&GRE_CSUM)
@@ -1269,6 +1267,8 @@ static int ip6gre_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
+ dev->iflink = tunnel->parms.link;
+
return 0;
}
@@ -1285,7 +1285,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
dev_hold(dev);
}
-
static struct inet6_protocol ip6gre_protocol __read_mostly = {
.handler = ip6gre_rcv,
.err_handler = ip6gre_err,
@@ -1462,6 +1461,8 @@ static int ip6gre_tap_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
+ dev->iflink = tunnel->parms.link;
+
return 0;
}
@@ -1554,6 +1555,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1631,6 +1641,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 516e136..602533d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -323,12 +323,16 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
+ /* ipv6 conntrack defrag sets max_frag_size + local_df */
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
return true;
+ if (skb->local_df)
+ return false;
+
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
return false;
@@ -512,6 +516,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ static bool hashrnd_initialized = false;
+ u32 hash, id;
+
+ if (unlikely(!hashrnd_initialized)) {
+ hashrnd_initialized = true;
+ get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+ }
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
+}
+
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -973,7 +994,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1009,7 +1030,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c1e11b5..f8a70a1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -62,6 +62,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
@@ -265,9 +266,6 @@ static int ip6_tnl_create2(struct net_device *dev)
int err;
t = netdev_priv(dev);
- err = ip6_tnl_dev_init(dev);
- if (err < 0)
- goto out;
err = register_netdevice(dev);
if (err < 0)
@@ -1447,6 +1445,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
static const struct net_device_ops ip6_tnl_netdev_ops = {
+ .ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
.ndo_start_xmit = ip6_tnl_xmit,
.ndo_do_ioctl = ip6_tnl_ioctl,
@@ -1531,16 +1530,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- int err = ip6_tnl_dev_init_gen(dev);
-
- if (err)
- return err;
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
- ip6_tnl_link_config(t);
-
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
}
@@ -1549,7 +1542,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
u8 proto;
- if (!data)
+ if (!data || !data[IFLA_IPTUN_PROTO])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 95f3f1d..d38e6a8 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
.daddr = iph->daddr,
.saddr = iph->saddr,
};
+ int err;
dst = ip6_route_output(net, skb->sk, &fl6);
- if (dst->error) {
+ err = dst->error;
+ if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return dst->error;
+ return err;
}
/* Drop old route. */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 44400c2..89a4e4d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1236,8 +1236,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795..4bd870a 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -3,38 +3,48 @@
* not configured or static. These functions are needed by GSO/GRO implementation.
*/
#include <linux/export.h>
+#include <linux/random.h>
+#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+/* This function exists only for tap drivers that must support broken
+ * clients requesting UFO without specifying an IPv6 fragment ID.
+ *
+ * This is similar to ipv6_select_ident() but we use an independent hash
+ * seed to limit information leakage.
+ *
+ * The network header must be set before calling this.
+ */
+void ipv6_proxy_select_ident(struct sk_buff *skb)
{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
+ static u32 ip6_proxy_idents_hashrnd __read_mostly;
+ struct in6_addr buf[2];
+ struct in6_addr *addrs;
+ static bool done = false;
+ u32 hash, id;
+
+ addrs = skb_header_pointer(skb,
+ skb_network_offset(skb) +
+ offsetof(struct ipv6hdr, saddr),
+ sizeof(buf), buf);
+ if (!addrs)
+ return;
+
+ if (!done) {
+ get_random_bytes(&ip6_proxy_idents_hashrnd,
+ sizeof(ip6_proxy_idents_hashrnd));
+ done = true;
}
-#endif
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
+
+ hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&addrs[0], hash);
+
+ id = ip_idents_reserve(hash, 1);
+ skb_shinfo(skb)->ip6_frag_id = htonl(id);
}
-EXPORT_SYMBOL(ipv6_select_ident);
+EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 77f81be..b4bb6a2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1329,7 +1329,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1339,7 +1339,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b433884..8e8fc32 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -195,10 +195,8 @@ static int ipip6_tunnel_create(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
int err;
- err = ipip6_tunnel_init(dev);
- if (err < 0)
- goto out;
- ipip6_tunnel_clone_6rd(dev, sitn);
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
if ((__force u16)t->parms.i_flags & SIT_ISATAP)
dev->priv_flags |= IFF_ISATAP;
@@ -207,7 +205,8 @@ static int ipip6_tunnel_create(struct net_device *dev)
if (err < 0)
goto out;
- strcpy(t->parms.name, dev->name);
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
dev->rtnl_link_ops = &sit_link_ops;
dev_hold(dev);
@@ -530,12 +529,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -1279,6 +1278,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
}
static const struct net_device_ops ipip6_netdev_ops = {
+ .ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
@@ -1313,9 +1313,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
-
- memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
- memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+ strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
dev->tstats = alloc_percpu(struct pcpu_tstats);
@@ -1334,7 +1332,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->protocol = IPPROTO_IPV6;
@@ -1770,4 +1767,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5c71501..3058c4a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1651,6 +1651,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v6_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1682,6 +1683,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1919,7 +1921,6 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index e096025..6857ae4 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1778,6 +1778,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
struct ipxhdr *ipx = NULL;
struct sk_buff *skb;
int copied, rc;
+ bool locked = true;
lock_sock(sk);
/* put the autobinding in */
@@ -1804,6 +1805,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
+ release_sock(sk);
+ locked = false;
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &rc);
if (!skb)
@@ -1837,7 +1840,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
out_free:
skb_free_datagram(sk, skb);
out:
- release_sock(sk);
+ if (locked)
+ release_sock(sk);
return rc;
}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c4b7218..1465363 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1829,7 +1829,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
spin_lock_irqsave(&list->lock, flags);
while (list_skb != (struct sk_buff *)list) {
- if (msg->tag != IUCV_SKB_CB(list_skb)->tag) {
+ if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
this = list_skb;
break;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 44441c0..c3ae241 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -754,9 +754,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
session->deref = pppol2tp_session_sock_put;
/* If PMTU discovery was enabled, use the MTU that was discovered */
- dst = sk_dst_get(sk);
+ dst = sk_dst_get(tunnel->sock);
if (dst != NULL) {
- u32 pmtu = dst_mtu(__sk_dst_get(sk));
+ u32 pmtu = dst_mtu(dst);
+
if (pmtu != 0)
session->mtu = session->mru = pmtu -
PPPOL2TP_HEADER_OVERHEAD;
@@ -1365,7 +1366,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
int err;
if (level != SOL_PPPOL2TP)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1491,7 +1492,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (get_user(len, optlen))
return -EFAULT;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cafe614..8e41f01 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read(
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*format)(sdata, buf, sizeof(buf));
+ ret = (*format)(sdata, buf, sizeof(buf));
read_unlock(&dev_base_lock);
if (ret >= 0)
@@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write(
ret = -ENODEV;
rtnl_lock();
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*write)(sdata, buf, count);
+ ret = (*write)(sdata, buf, count);
rtnl_unlock();
return ret;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a12afe7..f69cac4 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1203,6 +1203,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.privacy = params->privacy;
sdata->u.ibss.control_port = params->control_port;
sdata->u.ibss.basic_rates = params->basic_rates;
+ sdata->u.ibss.last_scan_completed = jiffies;
/* fix basic_rates if channel does not support these rates */
rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 611abfc..23c1316 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -311,6 +311,7 @@ struct ieee80211_roc_work {
bool started, abort, hw_begun, notified;
bool to_be_freed;
+ bool on_channel;
unsigned long hw_start_time;
@@ -1320,6 +1321,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
__le16 fc, bool acked);
+void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
/* IBSS code */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index fcecd63..31da72c 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -749,10 +749,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
u32 hw_reconf_flags = 0;
int i, flushed;
struct ps_data *ps;
+ bool cancel_scan;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
- if (rcu_access_pointer(local->scan_sdata) == sdata)
+ cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
+ if (cancel_scan)
ieee80211_scan_cancel(local);
/*
@@ -959,6 +961,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_ps(local, -1);
+ if (cancel_scan)
+ flush_delayed_work(&local->scan_work);
+
if (local->open_count == 0) {
ieee80211_stop_device(local);
@@ -1766,7 +1771,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
unregister_netdevice_many(&unreg_list);
- list_del(&unreg_list);
list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
list_del(&sdata->list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 620677e..23dfd24 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -615,7 +615,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
int i;
mutex_lock(&local->key_mtx);
- for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
key = key_mtx_dereference(local, sta->gtk[i]);
if (!key)
continue;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e765f77..2c5f21c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -148,6 +148,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
if (!rcu_access_pointer(sdata->vif.chanctx_conf))
continue;
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ continue;
power = min(power, sdata->vif.bss_conf.txpower);
}
rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8d7f4ab..023bc33 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -131,13 +131,13 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
if (unlikely(!sdata->u.mgd.associated))
return;
+ ifmgd->probe_send_count = 0;
+
if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
return;
mod_timer(&sdata->u.mgd.conn_mon_timer,
round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
-
- ifmgd->probe_send_count = 0;
}
static int ecw2cw(int ecw)
@@ -1265,7 +1265,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);
else
mod_timer(&ifmgd->chswitch_timer,
- TU_TO_EXP_TIME(count * cbss->beacon_interval));
+ TU_TO_EXP_TIME((count - 1) *
+ cbss->beacon_interval));
}
static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
@@ -2881,8 +2882,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
channel);
if (bss) {
- ieee80211_rx_bss_put(local, bss);
sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
+ ieee80211_rx_bss_put(local, bss);
}
}
@@ -3684,6 +3685,38 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
}
#ifdef CONFIG_PM
+void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+ sdata_lock(sdata);
+
+ if (ifmgd->auth_data || ifmgd->assoc_data) {
+ const u8 *bssid = ifmgd->auth_data ?
+ ifmgd->auth_data->bss->bssid :
+ ifmgd->assoc_data->bss->bssid;
+
+ /*
+ * If we are trying to authenticate / associate while suspending,
+ * cfg80211 won't know and won't actually abort those attempts,
+ * thus we need to do that ourselves.
+ */
+ ieee80211_send_deauth_disassoc(sdata, bssid,
+ IEEE80211_STYPE_DEAUTH,
+ WLAN_REASON_DEAUTH_LEAVING,
+ false, frame_buf);
+ if (ifmgd->assoc_data)
+ ieee80211_destroy_assoc_data(sdata, false);
+ if (ifmgd->auth_data)
+ ieee80211_destroy_auth_data(sdata, false);
+ cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
+ IEEE80211_DEAUTH_FRAME_LEN);
+ }
+
+ sdata_unlock(sdata);
+}
+
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -4308,8 +4341,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (bss->wmm_used && bss->uapsd_supported &&
- (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
- sdata->wmm_acm != 0xff) {
+ (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
assoc_data->uapsd = true;
ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
} else {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 0c2a294..7a17dec 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
container_of(work, struct ieee80211_roc_work, work.work);
struct ieee80211_sub_if_data *sdata = roc->sdata;
struct ieee80211_local *local = sdata->local;
- bool started;
+ bool started, on_channel;
mutex_lock(&local->mtx);
@@ -354,13 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work)
if (!roc->started) {
struct ieee80211_roc_work *dep;
- /* start this ROC */
+ WARN_ON(local->use_chanctx);
+
+ /* If actually operating on the desired channel (with at least
+ * 20 MHz channel width) don't stop all the operations but still
+ * treat it as though the ROC operation started properly, so
+ * other ROC operations won't interfere with this one.
+ */
+ roc->on_channel = roc->chan == local->_oper_chandef.chan &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_10;
- /* switch channel etc */
+ /* start this ROC */
ieee80211_recalc_idle(local);
- local->tmp_channel = roc->chan;
- ieee80211_hw_config(local, 0);
+ if (!roc->on_channel) {
+ ieee80211_offchannel_stop_vifs(local);
+
+ local->tmp_channel = roc->chan;
+ ieee80211_hw_config(local, 0);
+ }
/* tell userspace or send frame */
ieee80211_handle_roc_started(roc);
@@ -379,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work)
finish:
list_del(&roc->list);
started = roc->started;
+ on_channel = roc->on_channel;
ieee80211_roc_notify_destroy(roc, !roc->abort);
- if (started) {
+ if (started && !on_channel) {
ieee80211_flush_queues(local, NULL);
local->tmp_channel = NULL;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 3401262..efb510e 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -101,10 +101,18 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
/* remove all interfaces that were created in the driver */
list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata) ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ if (!ieee80211_sdata_running(sdata))
continue;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_MONITOR:
+ continue;
+ case NL80211_IFTYPE_STATION:
+ ieee80211_mgd_quiesce(sdata);
+ break;
+ default:
+ break;
+ }
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index e126605..8753b77 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -454,7 +454,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif,
*/
if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
u32 basic_rates = vif->bss_conf.basic_rates;
- s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
+ s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0;
rate = &sband->bitrates[rates[0].idx];
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 62fba17..ef3bdba 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1646,11 +1646,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sc = le16_to_cpu(hdr->seq_ctrl);
frag = sc & IEEE80211_SCTL_FRAG;
- if (likely((!ieee80211_has_morefrags(fc) && frag == 0) ||
- is_multicast_ether_addr(hdr->addr1))) {
- /* not fragmented */
- goto out;
+ if (is_multicast_ether_addr(hdr->addr1)) {
+ rx->local->dot11MulticastReceivedFrameCount++;
+ goto out_no_led;
}
+
+ if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+ goto out;
+
I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb))
@@ -1741,12 +1744,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
status->rx_flags |= IEEE80211_RX_FRAGMENTED;
out:
+ ieee80211_led_rx(rx->local);
+ out_no_led:
if (rx->sta)
rx->sta->rx_packets++;
- if (is_multicast_ether_addr(hdr->addr1))
- rx->local->dot11MulticastReceivedFrameCount++;
- else
- ieee80211_led_rx(rx->local);
return RX_CONTINUE;
}
@@ -3298,7 +3299,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
struct ieee80211_supported_band *sband;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- WARN_ON_ONCE_NONRT(softirq_count() == 0);
+ WARN_ON_ONCE(softirq_count() == 0);
if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
goto drop;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index db41c19..3702572 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -271,6 +271,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
+ kfree(rcu_dereference_raw(sta->sta.rates));
kfree(sta);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d6a47e7..c2785b2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -413,6 +413,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_has_order(hdr->frame_control))
return TX_CONTINUE;
+ if (ieee80211_is_probe_req(hdr->frame_control))
+ return TX_CONTINUE;
+
if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
info->hw_queue = tx->sdata->vif.cab_queue;
@@ -463,6 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
{
struct sta_info *sta = tx->sta;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
struct ieee80211_local *local = tx->local;
if (unlikely(!sta))
@@ -473,6 +477,15 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
!(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
int ac = skb_get_queue_mapping(tx->skb);
+ /* only deauth, disassoc and action are bufferable MMPDUs */
+ if (ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_is_deauth(hdr->frame_control) &&
+ !ieee80211_is_disassoc(hdr->frame_control) &&
+ !ieee80211_is_action(hdr->frame_control)) {
+ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+ return TX_CONTINUE;
+ }
+
ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n",
sta->sta.addr, sta->sta.aid, ac);
if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
@@ -530,22 +543,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-
if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
return TX_CONTINUE;
-
- /* only deauth, disassoc and action are bufferable MMPDUs */
- if (ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_deauth(hdr->frame_control) &&
- !ieee80211_is_disassoc(hdr->frame_control) &&
- !ieee80211_is_action(hdr->frame_control)) {
- if (tx->flags & IEEE80211_TX_UNICAST)
- info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
- return TX_CONTINUE;
- }
-
if (tx->flags & IEEE80211_TX_UNICAST)
return ieee80211_tx_h_unicast_ps_buf(tx);
else
@@ -2806,7 +2805,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
cpu_to_le16(IEEE80211_FCTL_MOREDATA);
}
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
if (!ieee80211_tx_prepare(sdata, &tx, skb))
break;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 6bd22aa..593b16e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,17 +21,11 @@
#include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/locallock.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include "nf_internals.h"
-#ifdef CONFIG_PREEMPT_RT_BASE
-DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
-EXPORT_PER_CPU_SYMBOL(xt_write_lock);
-#endif
-
static DEFINE_MUTEX(afinfo_mutex);
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f2e30fb..4fb68dc 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1753,6 +1753,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
if (*op < IP_SET_OP_VERSION) {
/* Check the version at the beginning of operations */
struct ip_set_req_version *req_version = data;
+
+ if (*len < sizeof(struct ip_set_req_version)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
if (req_version->version != IPSET_PROTOCOL) {
ret = -EPROTO;
goto done;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d585626..d9a7f00 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_control_del(cp);
if (cp->flags & IP_VS_CONN_F_NFCT) {
- ip_vs_conn_drop_conntrack(cp);
/* Do not access conntracks during subsys cleanup
* because nf_conntrack_find_get can not be used after
* conntrack cleanup for the net.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3581736..f7a758f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (ipip) {
__be32 info = ic->un.gateway;
+ __u8 type = ic->type;
+ __u8 code = ic->code;
/* Update the MTU */
if (ic->type == ICMP_DEST_UNREACH &&
ic->code == ICMP_FRAG_NEEDED) {
struct ip_vs_dest *dest = cp->dest;
u32 mtu = ntohs(ic->un.frag.mtu);
+ __be16 frag_off = cih->frag_off;
/* Strip outer IP and ICMP, go to IPIP header */
- __skb_pull(skb, ihl + sizeof(_icmph));
+ if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL)
+ goto ignore_ipip;
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
@@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipv4_update_pmtu(skb, dev_net(skb->dev),
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
- if (!(cih->frag_off & htons(IP_DF)))
+ if (!(frag_off & htons(IP_DF)))
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
@@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
/* Strip outer IP, ICMP and IPIP, go to IP header of
* original request.
*/
- __skb_pull(skb, offset2);
+ if (pskb_pull(skb, offset2) == NULL)
+ goto ignore_ipip;
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
- ic->type, ic->code, ntohl(info));
- icmp_send(skb, ic->type, ic->code, info);
+ type, code, ntohl(info));
+ icmp_send(skb, type, code, info);
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);
@@ -1901,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
{
.hook = ip_vs_local_reply6,
.owner = THIS_MODULE,
- .pf = NFPROTO_IPV4,
+ .pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a3df9bd..f956865 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3765,6 +3765,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ ip_vs_stop_estimator(net, &ipvs->tot_stats);
}
#else
@@ -3825,7 +3826,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
*/
rcu_barrier();
ip_vs_trash_cleanup(net);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77c1732..4a662f1 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -183,6 +183,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct nf_conn *ct;
struct net *net;
+ *diff = 0;
+
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
* so turn this into a no-op for IPv6 packets
@@ -191,8 +193,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
#endif
- *diff = 0;
-
/* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
@@ -321,6 +321,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct ip_vs_conn *n_cp;
struct net *net;
+ /* no diff required for incoming packets */
+ *diff = 0;
+
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
* so turn this into a no-op for IPv6 packets
@@ -329,9 +332,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
#endif
- /* no diff required for incoming packets */
- *diff = 0;
-
/* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index c8beafd..5a355a4 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -63,6 +63,7 @@
#include <net/ip_vs.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_zones.h>
@@ -97,6 +98,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return;
+ /* Applications may adjust TCP seqs */
+ if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP &&
+ !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct))
+ return;
+
/*
* The connection is not yet in the hashtable, so we update it.
* CIP->VIP will remain the same, so leave the tuple in
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e..1692e75 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
@@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->nexthdr = IPPROTO_IPV6;
iph->payload_len = old_iph->payload_len;
be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
- iph->priority = old_iph->priority;
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+ ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
iph->hop_limit = old_iph->hop_limit;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6f0f4f7..13deb61 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -491,6 +491,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (nf_nat_proto_remove(ct, data))
+ return 1;
+
+ if (!nat || !nat->ct)
+ return 0;
+
+ /* This netns is being destroyed, and conntrack has nat null binding.
+ * Remove it from bysource hash, as the table will be freed soon.
+ *
+ * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+ * will delete entry from already-freed table.
+ */
+ if (!del_timer(&ct->timeout))
+ return 1;
+
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ ct->status &= ~IPS_NAT_DONE_MASK;
+ nat->ct = NULL;
+ spin_unlock_bh(&nf_nat_lock);
+
+ add_timer(&ct->timeout);
+
+ /* don't delete conntrack. Although that would make things a lot
+ * simpler, we'd end up flushing all conntracks on nat rmmod.
+ */
+ return 0;
+}
+
static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
struct nf_nat_proto_clean clean = {
@@ -753,7 +786,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87d..0a03662 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,7 +147,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
const struct nfnetlink_subsystem *ss;
int type, err;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (!netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
/* All the messages must at least contain nfgenmsg */
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d92cc31..09172d7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -45,7 +45,8 @@
#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
-#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */
+/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN)
#define PRINTR(x, args...) do { if (net_ratelimit()) \
printk(x, ## args); } while (0);
@@ -255,6 +256,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
case NFULNL_COPY_PACKET:
inst->copy_mode = mode;
+ if (range == 0)
+ range = NFULNL_COPY_RANGE_MAX;
inst->copy_range = min_t(unsigned int,
range, NFULNL_COPY_RANGE_MAX);
break;
@@ -345,26 +348,25 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
return skb;
}
-static int
+static void
__nfulnl_send(struct nfulnl_instance *inst)
{
- int status = -1;
-
if (inst->qlen > 1) {
struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
NLMSG_DONE,
sizeof(struct nfgenmsg),
0);
- if (!nlh)
+ if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
+ inst->skb->len, skb_tailroom(inst->skb))) {
+ kfree_skb(inst->skb);
goto out;
+ }
}
- status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
- MSG_DONTWAIT);
-
+ nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
+ MSG_DONTWAIT);
+out:
inst->qlen = 0;
inst->skb = NULL;
-out:
- return status;
}
static void
@@ -651,7 +653,8 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(sizeof(u_int32_t)) /* gid */
+ nla_total_size(plen) /* prefix */
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
- + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
+ + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+ + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
if (in && skb_mac_header_was_set(skb)) {
size += nla_total_size(skb->dev->hard_header_len)
@@ -680,8 +683,7 @@ nfulnl_log_packet(struct net *net,
break;
case NFULNL_COPY_PACKET:
- if (inst->copy_range == 0
- || inst->copy_range > skb->len)
+ if (inst->copy_range > skb->len)
data_len = skb->len;
else
data_len = inst->copy_range;
@@ -694,8 +696,7 @@ nfulnl_log_packet(struct net *net,
goto unlock_and_release;
}
- if (inst->skb &&
- size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
+ if (inst->skb && size > skb_tailroom(inst->skb)) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
__nfulnl_flush(inst);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6135635..0059013 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -204,7 +204,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
if (nskb) {
nskb->dev = dev;
nskb->protocol = htons((u16) sk->sk_protocol);
-
+ skb_reset_network_header(nskb);
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
@@ -502,14 +502,14 @@ out:
return err;
}
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
{
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
struct page *p_start, *p_end;
/* First page is flushed through netlink_{get,set}_status */
p_start = pgvec_to_page(hdr + PAGE_SIZE);
- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+ p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
while (p_start <= p_end) {
flush_dcache_page(p_start);
p_start++;
@@ -527,9 +527,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
static void netlink_set_status(struct nl_mmap_hdr *hdr,
enum nl_mmap_status status)
{
+ smp_mb();
hdr->nm_status = status;
flush_dcache_page(pgvec_to_page(hdr));
- smp_wmb();
}
static struct nl_mmap_hdr *
@@ -628,7 +628,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
while (nlk->cb_running && netlink_dump_space(nlk)) {
err = netlink_dump(sk);
if (err < 0) {
- sk->sk_err = err;
+ sk->sk_err = -err;
sk->sk_error_report(sk);
break;
}
@@ -691,24 +691,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
struct nl_mmap_hdr *hdr;
struct sk_buff *skb;
unsigned int maxlen;
- bool excl = true;
int err = 0, len = 0;
- /* Netlink messages are validated by the receiver before processing.
- * In order to avoid userspace changing the contents of the message
- * after validation, the socket and the ring may only be used by a
- * single process, otherwise we fall back to copying.
- */
- if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
- atomic_read(&nlk->mapped) > 1)
- excl = false;
-
mutex_lock(&nlk->pg_vec_lock);
ring = &nlk->tx_ring;
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
do {
+ unsigned int nm_len;
+
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
if (hdr == NULL) {
if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -716,35 +708,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
schedule();
continue;
}
- if (hdr->nm_len > maxlen) {
+
+ nm_len = ACCESS_ONCE(hdr->nm_len);
+ if (nm_len > maxlen) {
err = -EINVAL;
goto out;
}
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, nm_len);
- if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
- skb = alloc_skb_head(GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- sock_hold(sk);
- netlink_ring_setup_skb(skb, sk, ring, hdr);
- NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
- __skb_put(skb, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
- atomic_inc(&ring->pending);
- } else {
- skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- __skb_put(skb, hdr->nm_len);
- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+ skb = alloc_skb(nm_len, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto out;
}
+ __skb_put(skb, nm_len);
+ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
netlink_increment_head(ring);
@@ -790,7 +770,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
hdr->nm_pid = NETLINK_CB(skb).creds.pid;
hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, hdr->nm_len);
netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
@@ -1352,7 +1332,74 @@ retry:
return err;
}
-static inline int netlink_capable(const struct socket *sock, unsigned int flag)
+/**
+ * __netlink_ns_capable - General netlink message capability test
+ * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+ struct user_namespace *user_ns, int cap)
+{
+ return ((nsp->flags & NETLINK_SKB_DST) ||
+ file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(__netlink_ns_capable);
+
+/**
+ * netlink_ns_capable - General netlink message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool netlink_ns_capable(const struct sk_buff *skb,
+ struct user_namespace *user_ns, int cap)
+{
+ return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_ns_capable);
+
+/**
+ * netlink_capable - Netlink global message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in all user namespaces.
+ */
+bool netlink_capable(const struct sk_buff *skb, int cap)
+{
+ return netlink_ns_capable(skb, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_capable);
+
+/**
+ * netlink_net_capable - Netlink network namespace message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap over the network namespace of
+ * the socket we received the message from.
+ */
+bool netlink_net_capable(const struct sk_buff *skb, int cap)
+{
+ return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_net_capable);
+
+static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
{
return (nl_table[sock->sk->sk_protocol].flags & flag) ||
ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
@@ -1420,7 +1467,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
/* Only superuser is allowed to listen multicasts */
if (nladdr->nl_groups) {
- if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+ if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
@@ -1482,7 +1529,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
/* Only superuser is allowed to send multicasts */
- if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ if (nladdr->nl_groups && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
if (!nlk->portid)
@@ -2088,7 +2135,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
- if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+ if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
@@ -2220,6 +2267,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
int err;
struct scm_cookie scm;
+ u32 netlink_skb_flags = 0;
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
@@ -2239,8 +2287,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
dst_group = ffs(addr->nl_groups);
err = -EPERM;
if ((dst_group || dst_portid) &&
- !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
goto out;
+ netlink_skb_flags |= NETLINK_SKB_DST;
} else {
dst_portid = nlk->dst_portid;
dst_group = nlk->dst_group;
@@ -2270,6 +2319,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
NETLINK_CB(skb).creds = siocb->scm->creds;
+ NETLINK_CB(skb).flags = netlink_skb_flags;
err = -EFAULT;
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
@@ -2370,7 +2420,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = ret;
+ sk->sk_err = -ret;
sk->sk_error_report(sk);
}
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0c741ce..c7408dd 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -592,7 +592,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
return -EOPNOTSUPP;
if ((ops->flags & GENL_ADMIN_PERM) &&
- !capable(CAP_NET_ADMIN))
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 65cfaa8..07c4ae3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -42,6 +42,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
static int make_writable(struct sk_buff *skb, int write_len)
{
+ if (!pskb_may_pull(skb, write_len))
+ return -ENOMEM;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
+ if (skb_network_offset(skb) < ETH_HLEN)
+ skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
return 0;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d1705d0..a846125 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -63,7 +63,6 @@
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
-#include <linux/delay.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -566,6 +565,7 @@ static void init_prb_bdqc(struct packet_sock *po,
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+ p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
prb_setup_retire_blk_timer(po, tx_ring);
prb_open_block(p1, pbd);
@@ -631,7 +631,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
if (BLOCK_NUM_PKTS(pbd)) {
while (atomic_read(&pkc->blk_fill_in_prog)) {
/* Waiting for skb_copy_bits to finish... */
- cpu_chill();
+ cpu_relax();
}
}
@@ -882,7 +882,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
if (!(status & TP_STATUS_BLK_TMO)) {
while (atomic_read(&pkc->blk_fill_in_prog)) {
/* Waiting for skb_copy_bits to finish... */
- cpu_chill();
+ cpu_relax();
}
}
prb_close_block(pkc, pbd, po, status);
@@ -1815,6 +1815,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if ((int)snaplen < 0)
snaplen = 0;
}
+ } else if (unlikely(macoff + snaplen >
+ GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+ u32 nval;
+
+ nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
+ pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
+ snaplen, nval, macoff);
+ snaplen = nval;
+ if (unlikely((int)snaplen < 0)) {
+ snaplen = 0;
+ macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ }
}
spin_lock(&sk->sk_receive_queue.lock);
h.raw = packet_current_rx_frame(po, skb,
@@ -3611,6 +3623,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
goto out;
+ if (po->tp_version >= TPACKET_V3 &&
+ (int)(req->tp_block_size -
+ BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+ goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
goto out;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index a9584a2..674b0a6 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -127,6 +127,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
struct packet_diag_req *req,
+ bool may_report_filterinfo,
struct user_namespace *user_ns,
u32 portid, u32 seq, u32 flags, int sk_ino)
{
@@ -171,7 +172,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
goto out_nlmsg_trim;
if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
- sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
+ sock_diag_put_filterinfo(may_report_filterinfo, sk, skb,
+ PACKET_DIAG_FILTER))
goto out_nlmsg_trim;
return nlmsg_end(skb, nlh);
@@ -187,9 +189,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct packet_diag_req *req;
struct net *net;
struct sock *sk;
+ bool may_report_filterinfo;
net = sock_net(skb->sk);
req = nlmsg_data(cb->nlh);
+ may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
mutex_lock(&net->packet.sklist_lock);
sk_for_each(sk, &net->packet.sklist) {
@@ -199,6 +203,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (sk_diag_fill(sk, skb, req,
+ may_report_filterinfo,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 1035fa2..ca086c0 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -29,6 +29,7 @@ struct tpacket_kbdq_core {
char *pkblk_start;
char *pkblk_end;
int kblk_size;
+ unsigned int max_frame_len;
unsigned int knum_blocks;
uint64_t knxt_seq_num;
char *prev;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index dc15f43..b64151a 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
u8 pnaddr;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
ASSERT_RTNL();
@@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
u8 dst;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
ASSERT_RTNL();
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 5a44c6e..e8fdb17 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -34,7 +34,6 @@
#include <linux/slab.h>
#include <linux/rculist.h>
#include <linux/llist.h>
-#include <linux/delay.h>
#include "rds.h"
#include "ib.h"
@@ -287,7 +286,7 @@ static inline void wait_clean_list_grace(void)
for_each_online_cpu(cpu) {
flag = &per_cpu(clean_list_grace, cpu);
while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
- cpu_chill();
+ cpu_relax();
}
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fd70728..15d46b9 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -989,7 +989,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
- if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8e118af..2ea40d1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -138,7 +138,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
int err;
int tp_created = 0;
- if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
replay:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2adda7f..3f5fe03 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1076,7 +1076,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1143,7 +1143,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q, *p;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -1483,7 +1483,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1dcebc9..a74e278 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -850,7 +850,7 @@ void dev_deactivate_many(struct list_head *head)
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, unreg_list)
while (some_qdisc_is_busy(dev))
- msleep(1);
+ yield();
}
void dev_deactivate(struct net_device *dev)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index cef5099..737050f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -375,7 +375,7 @@ void sctp_association_free(struct sctp_association *asoc)
/* Only real associations count against the endpoint, so
* don't bother for if this is a temporary association.
*/
- if (!asoc->temp) {
+ if (!list_empty(&asoc->asocs)) {
list_del(&asoc->asocs);
/* Decrement the backlog value for a TCP-style listening
@@ -1198,6 +1198,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->c = new->c;
asoc->peer.rwnd = new->peer.rwnd;
asoc->peer.sack_needed = new->peer.sack_needed;
+ asoc->peer.auth_capable = new->peer.auth_capable;
asoc->peer.i = new->peer.i;
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
asoc->peer.i.initial_tsn, GFP_ATOMIC);
@@ -1644,6 +1645,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
* ack chunk whose serial number matches that of the request.
*/
list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
+ if (sctp_chunk_pending(ack))
+ continue;
if (ack->subh.addip_hdr->serial == serial) {
sctp_chunk_hold(ack);
return ack;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 8c4fa5d..4b842e9 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -387,14 +387,13 @@ nomem:
*/
int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_auth_bytes *secret;
struct sctp_shared_key *ep_key;
/* If we don't support AUTH, or peer is not capable
* we don't need to do anything.
*/
- if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
return 0;
/* If the key_id is non-zero and we couldn't find an
@@ -441,16 +440,16 @@ struct sctp_shared_key *sctp_auth_get_shkey(
*/
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
{
- struct net *net = sock_net(ep->base.sk);
struct crypto_hash *tfm = NULL;
__u16 id;
- /* if the transforms are already allocted, we are done */
- if (!net->sctp.auth_enable) {
+ /* If AUTH extension is disabled, we are done */
+ if (!ep->auth_enable) {
ep->auth_hmacs = NULL;
return 0;
}
+ /* If the transforms are already allocated, we are done */
if (ep->auth_hmacs)
return 0;
@@ -671,12 +670,10 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
/* Check if peer requested that this chunk is authenticated */
int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- struct net *net;
if (!asoc)
return 0;
- net = sock_net(asoc->base.sk);
- if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -685,12 +682,10 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
/* Check if we requested that peer authenticate this chunk. */
int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- struct net *net;
if (!asoc)
return 0;
- net = sock_net(asoc->base.sk);
- if (!net->sctp.auth_enable)
+ if (!asoc->ep->auth_enable)
return 0;
return __sctp_auth_cid(chunk,
@@ -873,8 +868,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
list_add(&cur_key->key_list, sh_keys);
cur_key->key = key;
- sctp_auth_key_hold(key);
-
return 0;
nomem:
if (!replace)
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 09b8daa..477dd23 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -69,7 +69,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
if (!ep->digest)
return NULL;
- if (net->sctp.auth_enable) {
+ ep->auth_enable = net->sctp.auth_enable;
+ if (ep->auth_enable) {
/* Allocate space for HMACS and CHUNKS authentication
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 5856932..560cd41 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -141,18 +141,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
} else {
/* Nothing to do. Next chunk in the packet, please. */
ch = (sctp_chunkhdr_t *) chunk->chunk_end;
-
/* Force chunk->skb->data to chunk->chunk_end. */
- skb_pull(chunk->skb,
- chunk->chunk_end - chunk->skb->data);
-
- /* Verify that we have at least chunk headers
- * worth of buffer left.
- */
- if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) {
- sctp_chunk_free(chunk);
- chunk = queue->in_progress = NULL;
- }
+ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
+ /* We are guaranteed to pull a SCTP header. */
}
}
@@ -188,24 +179,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
+ if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
+ skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
- /* RFC 2960, Section 6.10 Bundling
- *
- * Partial chunks MUST NOT be placed in an SCTP packet.
- * If the receiver detects a partial chunk, it MUST drop
- * the chunk.
- *
- * Since the end of the chunk is past the end of our buffer
- * (which contains the whole packet, we can freely discard
- * the whole packet.
- */
- sctp_chunk_free(chunk);
- chunk = queue->in_progress = NULL;
-
- return NULL;
+ /* Discard inside state machine. */
+ chunk->pdiscard = 1;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
} else {
/* We are at the end of the packet, so mark the chunk
* in case we need to send a SACK.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 3191373..69faf79 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -403,12 +403,12 @@ int sctp_packet_transmit(struct sctp_packet *packet)
sk = chunk->skb->sk;
/* Allocate the new skb. */
- nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC);
+ nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC);
if (!nskb)
goto nomem;
/* Make sure the outbound skb has enough header room reserved. */
- skb_reserve(nskb, packet->overhead + LL_MAX_HEADER);
+ skb_reserve(nskb, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the
* destination IP address.
@@ -606,7 +606,7 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5e17092..2b216f1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -492,8 +492,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
continue;
if ((laddr->state == SCTP_ADDR_SRC) &&
(AF_INET == laddr->a.sa.sa_family)) {
- fl4->saddr = laddr->a.v4.sin_addr.s_addr;
fl4->fl4_sport = laddr->a.v4.sin_port;
+ flowi4_update_output(fl4,
+ asoc->base.sk->sk_bound_dev_if,
+ RT_CONN_FLAGS(asoc->base.sk),
+ daddr->v4.sin_addr.s_addr,
+ laddr->a.v4.sin_addr.s_addr);
+
rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt)) {
dst = &rt->dst;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 26be077..d800160 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -218,6 +218,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
gfp_t gfp, int vparam_len)
{
struct net *net = sock_net(asoc->base.sk);
+ struct sctp_endpoint *ep = asoc->ep;
sctp_inithdr_t init;
union sctp_params addrs;
size_t chunksize;
@@ -277,7 +278,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += vparam_len;
/* Account for AUTH related parameters */
- if (net->sctp.auth_enable) {
+ if (ep->auth_enable) {
/* Add random parameter length*/
chunksize += sizeof(asoc->c.auth_random);
@@ -362,7 +363,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
}
/* Add SCTP-AUTH chunks to the parameter list */
- if (net->sctp.auth_enable) {
+ if (ep->auth_enable) {
sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
asoc->c.auth_random);
if (auth_hmacs)
@@ -2023,7 +2024,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
/* if the peer reports AUTH, assume that he
* supports AUTH.
*/
- if (net->sctp.auth_enable)
+ if (asoc->ep->auth_enable)
asoc->peer.auth_capable = 1;
break;
case SCTP_CID_ASCONF:
@@ -2115,6 +2116,7 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
* SCTP_IERROR_NO_ERROR - continue with the chunk
*/
static sctp_ierror_t sctp_verify_param(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
union sctp_params param,
sctp_cid_t cid,
@@ -2165,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
goto fallthrough;
case SCTP_PARAM_RANDOM:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
/* SCTP-AUTH: Secion 6.1
@@ -2182,7 +2184,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_CHUNKS:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
/* SCTP-AUTH: Section 3.2
@@ -2198,7 +2200,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2233,10 +2235,9 @@ fallthrough:
}
/* Verify the INIT packet before we process it. */
-int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
- sctp_cid_t cid,
- sctp_init_chunk_t *peer_init,
- struct sctp_chunk *chunk,
+int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc, sctp_cid_t cid,
+ sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
struct sctp_chunk **errp)
{
union sctp_params param;
@@ -2277,8 +2278,8 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
/* Verify all the variable length parameters */
sctp_walk_params(param, peer_init, init_hdr.params) {
-
- result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
+ result = sctp_verify_param(net, ep, asoc, param, cid,
+ chunk, errp);
switch (result) {
case SCTP_IERROR_ABORT:
case SCTP_IERROR_NOMEM:
@@ -2510,6 +2511,7 @@ static int sctp_process_param(struct sctp_association *asoc,
struct sctp_af *af;
union sctp_addr_param *addr_param;
struct sctp_transport *t;
+ struct sctp_endpoint *ep = asoc->ep;
/* We maintain all INIT parameters in network byte order all the
* time. This allows us to not worry about whether the parameters
@@ -2620,6 +2622,9 @@ do_addr_param:
addr_param = param.v + sizeof(sctp_addip_param_t);
af = sctp_get_af_specific(param_type2af(param.p->type));
+ if (af == NULL)
+ break;
+
af->from_addr_param(&addr, addr_param,
htons(asoc->peer.port), 0);
@@ -2649,7 +2654,7 @@ do_addr_param:
goto fall_through;
case SCTP_PARAM_RANDOM:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
/* Save peer's random parameter */
@@ -2662,7 +2667,7 @@ do_addr_param:
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
/* Save peer's HMAC list */
@@ -2678,7 +2683,7 @@ do_addr_param:
break;
case SCTP_PARAM_CHUNKS:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
asoc->peer.peer_chunks = kmemdup(param.p,
@@ -3121,50 +3126,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
return SCTP_ERROR_NO_ERROR;
}
-/* Verify the ASCONF packet before we process it. */
-int sctp_verify_asconf(const struct sctp_association *asoc,
- struct sctp_paramhdr *param_hdr, void *chunk_end,
- struct sctp_paramhdr **errp) {
- sctp_addip_param_t *asconf_param;
+/* Verify the ASCONF packet before we process it. */
+bool sctp_verify_asconf(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, bool addr_param_needed,
+ struct sctp_paramhdr **errp)
+{
+ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
union sctp_params param;
- int length, plen;
+ bool addr_param_seen = false;
- param.v = (sctp_paramhdr_t *) param_hdr;
- while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) {
- length = ntohs(param.p->length);
- *errp = param.p;
-
- if (param.v > chunk_end - length ||
- length < sizeof(sctp_paramhdr_t))
- return 0;
+ sctp_walk_params(param, addip, addip_hdr.params) {
+ size_t length = ntohs(param.p->length);
+ *errp = param.p;
switch (param.p->type) {
+ case SCTP_PARAM_ERR_CAUSE:
+ break;
+ case SCTP_PARAM_IPV4_ADDRESS:
+ if (length != sizeof(sctp_ipv4addr_param_t))
+ return false;
+ addr_param_seen = true;
+ break;
+ case SCTP_PARAM_IPV6_ADDRESS:
+ if (length != sizeof(sctp_ipv6addr_param_t))
+ return false;
+ addr_param_seen = true;
+ break;
case SCTP_PARAM_ADD_IP:
case SCTP_PARAM_DEL_IP:
case SCTP_PARAM_SET_PRIMARY:
- asconf_param = (sctp_addip_param_t *)param.v;
- plen = ntohs(asconf_param->param_hdr.length);
- if (plen < sizeof(sctp_addip_param_t) +
- sizeof(sctp_paramhdr_t))
- return 0;
+ /* In ASCONF chunks, these need to be first. */
+ if (addr_param_needed && !addr_param_seen)
+ return false;
+ length = ntohs(param.addip->param_hdr.length);
+ if (length < sizeof(sctp_addip_param_t) +
+ sizeof(sctp_paramhdr_t))
+ return false;
break;
case SCTP_PARAM_SUCCESS_REPORT:
case SCTP_PARAM_ADAPTATION_LAYER_IND:
if (length != sizeof(sctp_addip_param_t))
- return 0;
-
+ return false;
break;
default:
- break;
+ /* This is unkown to us, reject! */
+ return false;
}
-
- param.v += WORD_ROUND(length);
}
- if (param.v != chunk_end)
- return 0;
+ /* Remaining sanity checks. */
+ if (addr_param_needed && !addr_param_seen)
+ return false;
+ if (!addr_param_needed && addr_param_seen)
+ return false;
+ if (param.v != chunk->chunk_end)
+ return false;
- return 1;
+ return true;
}
/* Process an incoming ASCONF chunk with the next expected serial no. and
@@ -3173,16 +3191,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc,
struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
struct sctp_chunk *asconf)
{
+ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+ bool all_param_pass = true;
+ union sctp_params param;
sctp_addiphdr_t *hdr;
union sctp_addr_param *addr_param;
sctp_addip_param_t *asconf_param;
struct sctp_chunk *asconf_ack;
-
__be16 err_code;
int length = 0;
int chunk_len;
__u32 serial;
- int all_param_pass = 1;
chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
hdr = (sctp_addiphdr_t *)asconf->skb->data;
@@ -3210,9 +3229,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
goto done;
/* Process the TLVs contained within the ASCONF chunk. */
- while (chunk_len > 0) {
+ sctp_walk_params(param, addip, addip_hdr.params) {
+ /* Skip preceeding address parameters. */
+ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+ param.p->type == SCTP_PARAM_IPV6_ADDRESS)
+ continue;
+
err_code = sctp_process_asconf_param(asoc, asconf,
- asconf_param);
+ param.addip);
/* ADDIP 4.1 A7)
* If an error response is received for a TLV parameter,
* all TLVs with no response before the failed TLV are
@@ -3220,28 +3244,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
* the failed response are considered unsuccessful unless
* a specific success indication is present for the parameter.
*/
- if (SCTP_ERROR_NO_ERROR != err_code)
- all_param_pass = 0;
-
+ if (err_code != SCTP_ERROR_NO_ERROR)
+ all_param_pass = false;
if (!all_param_pass)
- sctp_add_asconf_response(asconf_ack,
- asconf_param->crr_id, err_code,
- asconf_param);
+ sctp_add_asconf_response(asconf_ack, param.addip->crr_id,
+ err_code, param.addip);
/* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add
* an IP address sends an 'Out of Resource' in its response, it
* MUST also fail any subsequent add or delete requests bundled
* in the ASCONF.
*/
- if (SCTP_ERROR_RSRC_LOW == err_code)
+ if (err_code == SCTP_ERROR_RSRC_LOW)
goto done;
-
- /* Move to the next ASCONF param. */
- length = ntohs(asconf_param->param_hdr.length);
- asconf_param = (void *)asconf_param + length;
- chunk_len -= length;
}
-
done:
asoc->peer.addip_serial++;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0a5f050..bf12098 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -171,6 +171,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
{
__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
+ /* Previously already marked? */
+ if (unlikely(chunk->pdiscard))
+ return 0;
if (unlikely(chunk_length < required_length))
return 0;
@@ -358,7 +361,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
@@ -525,7 +528,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
@@ -1431,7 +1434,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
@@ -1776,9 +1779,22 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
- SCTP_STATE(SCTP_STATE_ESTABLISHED));
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ (sctp_sstate(asoc->base.sk, CLOSING) ||
+ sock_flag(asoc->base.sk, SOCK_DEAD))) {
+ /* if were currently in SHUTDOWN_PENDING, but the socket
+ * has been closed by user, don't transition to ESTABLISHED.
+ * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
+ SCTP_ST_CHUNK(0), NULL,
+ commands);
+ } else {
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+ SCTP_STATE(SCTP_STATE_ESTABLISHED));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ }
return SCTP_DISPOSITION_CONSUME;
nomem_ev:
@@ -3579,9 +3595,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
struct sctp_chunk *asconf_ack = NULL;
struct sctp_paramhdr *err_param = NULL;
sctp_addiphdr_t *hdr;
- union sctp_addr_param *addr_param;
__u32 serial;
- int length;
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3606,17 +3620,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
hdr = (sctp_addiphdr_t *)chunk->skb->data;
serial = ntohl(hdr->serial);
- addr_param = (union sctp_addr_param *)hdr->params;
- length = ntohs(addr_param->p.length);
- if (length < sizeof(sctp_paramhdr_t))
- return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
- (void *)addr_param, commands);
-
/* Verify the ASCONF chunk before processing it. */
- if (!sctp_verify_asconf(asoc,
- (sctp_paramhdr_t *)((void *)addr_param + length),
- (void *)chunk->chunk_end,
- &err_param))
+ if (!sctp_verify_asconf(asoc, chunk, true, &err_param))
return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
@@ -3734,10 +3739,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
rcvd_serial = ntohl(addip_hdr->serial);
/* Verify the ASCONF-ACK chunk before processing it. */
- if (!sctp_verify_asconf(asoc,
- (sctp_paramhdr_t *)addip_hdr->params,
- (void *)asconf_ack->chunk_end,
- &err_param))
+ if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param))
return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 14c8015..e00a041 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3296,10 +3296,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunk val;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authchunk))
@@ -3316,7 +3316,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
}
/* add this chunk id to the endpoint */
- return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk);
+ return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk);
}
/*
@@ -3329,12 +3329,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_hmacalgo *hmacs;
u32 idents;
int err;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3351,7 +3351,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
goto out;
}
- err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs);
+ err = sctp_auth_ep_set_hmacs(ep, hmacs);
out:
kfree(hmacs);
return err;
@@ -3367,12 +3367,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkey *authkey;
struct sctp_association *asoc;
int ret;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen <= sizeof(struct sctp_authkey))
@@ -3393,7 +3393,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
goto out;
}
- ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
+ ret = sctp_auth_set_key(ep, asoc, authkey);
out:
kzfree(authkey);
return ret;
@@ -3409,11 +3409,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3425,8 +3425,7 @@ static int sctp_setsockopt_active_key(struct sock *sk,
if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
- return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc,
- val.scact_keynumber);
+ return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
}
/*
@@ -3438,11 +3437,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3454,8 +3453,7 @@ static int sctp_setsockopt_del_key(struct sock *sk,
if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
- return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc,
- val.scact_keynumber);
+ return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
}
@@ -5353,16 +5351,16 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_hmacalgo __user *p = (void __user *)optval;
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
- hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
+ hmacs = ep->auth_hmacs_list;
data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
if (len < sizeof(struct sctp_hmacalgo) + data_len)
@@ -5383,11 +5381,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
static int sctp_getsockopt_active_key(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authkeyid))
@@ -5402,7 +5400,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (asoc)
val.scact_keynumber = asoc->active_key_id;
else
- val.scact_keynumber = sctp_sk(sk)->ep->active_key_id;
+ val.scact_keynumber = ep->active_key_id;
len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
@@ -5416,7 +5414,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5424,7 +5422,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5460,7 +5458,7 @@ num:
static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5468,7 +5466,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5485,7 +5483,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (asoc)
ch = (struct sctp_chunks_param*)asoc->c.auth_chunks;
else
- ch = sctp_sk(sk)->ep->auth_chunk_list;
+ ch = ep->auth_chunk_list;
if (!ch)
goto num;
@@ -6564,6 +6562,46 @@ static void __sctp_write_space(struct sctp_association *asoc)
}
}
+static void sctp_wake_up_waiters(struct sock *sk,
+ struct sctp_association *asoc)
+{
+ struct sctp_association *tmp = asoc;
+
+ /* We do accounting for the sndbuf space per association,
+ * so we only need to wake our own association.
+ */
+ if (asoc->ep->sndbuf_policy)
+ return __sctp_write_space(asoc);
+
+ /* If association goes down and is just flushing its
+ * outq, then just normally notify others.
+ */
+ if (asoc->base.dead)
+ return sctp_write_space(sk);
+
+ /* Accounting for the sndbuf space is per socket, so we
+ * need to wake up others, try to be fair and in case of
+ * other associations, let them have a go first instead
+ * of just doing a sctp_write_space() call.
+ *
+ * Note that we reach sctp_wake_up_waiters() only when
+ * associations free up queued chunks, thus we are under
+ * lock and the list of associations on a socket is
+ * guaranteed not to change.
+ */
+ for (tmp = list_next_entry(tmp, asocs); 1;
+ tmp = list_next_entry(tmp, asocs)) {
+ /* Manually skip the head element. */
+ if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs))
+ continue;
+ /* Wake up association. */
+ __sctp_write_space(tmp);
+ /* We've reached the end. */
+ if (tmp == asoc)
+ break;
+ }
+}
+
/* Do accounting for the sndbuf space.
* Decrement the used sndbuf space of the corresponding association by the
* data size which was just transmitted(freed).
@@ -6591,7 +6629,7 @@ static void sctp_wfree(struct sk_buff *skb)
sk_mem_uncharge(sk, skb->truesize);
sock_wfree(skb);
- __sctp_write_space(asoc);
+ sctp_wake_up_waiters(sk, asoc);
sctp_association_put(asoc);
}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 6b36561..968355f 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -59,8 +59,11 @@ extern int sysctl_sctp_wmem[3];
static int proc_sctp_do_hmac_alg(struct ctl_table *ctl,
int write,
void __user *buffer, size_t *lenp,
-
loff_t *ppos);
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
static struct ctl_table sctp_table[] = {
{
.procname = "sctp_mem",
@@ -261,7 +264,7 @@ static struct ctl_table sctp_net_table[] = {
.data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_sctp_do_auth,
},
{
.procname = "addr_scope_policy",
@@ -300,41 +303,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- char tmp[8];
struct ctl_table tbl;
- int ret;
- int changed = 0;
+ bool changed = false;
char *none = "none";
+ char tmp[8];
+ int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
if (write) {
tbl.data = tmp;
- tbl.maxlen = 8;
+ tbl.maxlen = sizeof(tmp);
} else {
tbl.data = net->sctp.sctp_hmac_alg ? : none;
tbl.maxlen = strlen(tbl.data);
}
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write) {
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
#ifdef CONFIG_CRYPTO_MD5
if (!strncmp(tmp, "md5", 3)) {
net->sctp.sctp_hmac_alg = "md5";
- changed = 1;
+ changed = true;
}
#endif
#ifdef CONFIG_CRYPTO_SHA1
if (!strncmp(tmp, "sha1", 4)) {
net->sctp.sctp_hmac_alg = "sha1";
- changed = 1;
+ changed = true;
}
#endif
if (!strncmp(tmp, "none", 4)) {
net->sctp.sctp_hmac_alg = NULL;
- changed = 1;
+ changed = true;
}
-
if (!changed)
ret = -EINVAL;
}
@@ -342,6 +344,36 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl,
return ret;
}
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct ctl_table tbl;
+ int new_value, ret;
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
+ tbl.maxlen = sizeof(unsigned int);
+
+ if (write)
+ tbl.data = &new_value;
+ else
+ tbl.data = &net->sctp.auth_enable;
+
+ ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
+ struct sock *sk = net->sctp.ctl_sock;
+
+ net->sctp.auth_enable = new_value;
+ /* Update the value in the control socket */
+ lock_sock(sk);
+ sctp_sk(sk)->ep->auth_enable = new_value;
+ release_sock(sk);
+ }
+
+ return ret;
+}
+
int sctp_sysctl_net_register(struct net *net)
{
struct ctl_table *table;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 81089ed..12c37ce 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -367,9 +367,10 @@ fail:
* specification [SCTP] and any extensions for a list of possible
* error formats.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
- const struct sctp_association *asoc, struct sctp_chunk *chunk,
- __u16 flags, gfp_t gfp)
+struct sctp_ulpevent *
+sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, __u16 flags,
+ gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_remote_error *sre;
@@ -388,8 +389,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
/* Copy the skb to a new skb with room for us to prepend
* notification with.
*/
- skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
- 0, gfp);
+ skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp);
/* Pull off the rest of the cause TLV from the chunk. */
skb_pull(chunk->skb, elen);
@@ -400,62 +400,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
event = sctp_skb2event(skb);
sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
- sre = (struct sctp_remote_error *)
- skb_push(skb, sizeof(struct sctp_remote_error));
+ sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
/* Trim the buffer to the right length. */
- skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+ skb_trim(skb, sizeof(*sre) + elen);
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_type:
- * It should be SCTP_REMOTE_ERROR.
- */
+ /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */
+ memset(sre, 0, sizeof(*sre));
sre->sre_type = SCTP_REMOTE_ERROR;
-
- /*
- * Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_flags: 16 bits (unsigned integer)
- * Currently unused.
- */
sre->sre_flags = 0;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_length: sizeof (__u32)
- *
- * This field is the total length of the notification data,
- * including the notification header.
- */
sre->sre_length = skb->len;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_error: 16 bits (unsigned integer)
- * This value represents one of the Operational Error causes defined in
- * the SCTP specification, in network byte order.
- */
sre->sre_error = cause;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association id field, holds the identifier for the association.
- * All notifications for a given association have the same association
- * identifier. For TCP style socket, this field is ignored.
- */
sctp_ulpevent_set_owner(event, asoc);
sre->sre_assoc_id = sctp_assoc2id(asoc);
return event;
-
fail:
return NULL;
}
@@ -900,7 +859,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
return notification->sn_header.sn_type;
}
-/* Copy out the sndrcvinfo into a msghdr. */
+/* RFC6458, Section 5.3.2. SCTP Header Information Structure
+ * (SCTP_SNDRCV, DEPRECATED)
+ */
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *msghdr)
{
@@ -909,74 +870,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
if (sctp_ulpevent_is_notification(event))
return;
- /* Sockets API Extensions for SCTP
- * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
- *
- * sinfo_stream: 16 bits (unsigned integer)
- *
- * For recvmsg() the SCTP stack places the message's stream number in
- * this value.
- */
+ memset(&sinfo, 0, sizeof(sinfo));
sinfo.sinfo_stream = event->stream;
- /* sinfo_ssn: 16 bits (unsigned integer)
- *
- * For recvmsg() this value contains the stream sequence number that
- * the remote endpoint placed in the DATA chunk. For fragmented
- * messages this is the same number for all deliveries of the message
- * (if more than one recvmsg() is needed to read the message).
- */
sinfo.sinfo_ssn = event->ssn;
- /* sinfo_ppid: 32 bits (unsigned integer)
- *
- * In recvmsg() this value is
- * the same information that was passed by the upper layer in the peer
- * application. Please note that byte order issues are NOT accounted
- * for and this information is passed opaquely by the SCTP stack from
- * one end to the other.
- */
sinfo.sinfo_ppid = event->ppid;
- /* sinfo_flags: 16 bits (unsigned integer)
- *
- * This field may contain any of the following flags and is composed of
- * a bitwise OR of these values.
- *
- * recvmsg() flags:
- *
- * SCTP_UNORDERED - This flag is present when the message was sent
- * non-ordered.
- */
sinfo.sinfo_flags = event->flags;
- /* sinfo_tsn: 32 bit (unsigned integer)
- *
- * For the receiving side, this field holds a TSN that was
- * assigned to one of the SCTP Data Chunks.
- */
sinfo.sinfo_tsn = event->tsn;
- /* sinfo_cumtsn: 32 bit (unsigned integer)
- *
- * This field will hold the current cumulative TSN as
- * known by the underlying SCTP layer. Note this field is
- * ignored when sending and only valid for a receive
- * operation when sinfo_flags are set to SCTP_UNORDERED.
- */
sinfo.sinfo_cumtsn = event->cumtsn;
- /* sinfo_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association handle field, sinfo_assoc_id, holds the identifier
- * for the association announced in the COMMUNICATION_UP notification.
- * All notifications for a given association have the same identifier.
- * Ignored for one-to-one style sockets.
- */
sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
-
- /* context value that is set via SCTP_CONTEXT socket option. */
+ /* Context value that is set via SCTP_CONTEXT socket option. */
sinfo.sinfo_context = event->asoc->default_rcv_context;
-
/* These fields are not used while receiving. */
sinfo.sinfo_timetolive = 0;
put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
- sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+ sizeof(sinfo), &sinfo);
}
/* Do accounting for bytes received and hold a reference to the association
diff --git a/net/socket.c b/net/socket.c
index dc57dae..c8ca896 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3023,19 +3023,16 @@ static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
IFNAMSIZ))
return -EFAULT;
- if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
+ if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
return -EFAULT;
data64 = compat_ptr(data32);
u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
- /* Don't check these user accesses, just let that get trapped
- * in the ioctl handler instead.
- */
if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
IFNAMSIZ))
return -EFAULT;
- if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
+ if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
return -EFAULT;
return dev_ioctl(net, cmd, u_ifreq64);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 80a6640..b9aad47 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -730,6 +730,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt)
svc_add_new_temp_xprt(serv, newxpt);
+ else
+ module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c9caaa..8c6e9c7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -683,6 +683,7 @@ static struct svc_xprt_class svc_udp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_udp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+ .xcl_ident = XPRT_TRANSPORT_UDP,
};
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1277,6 +1278,7 @@ static struct svc_xprt_class svc_tcp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_tcp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+ .xcl_ident = XPRT_TRANSPORT_TCP,
};
void svc_init_xprt_sock(void)
@@ -1395,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return svsk;
}
+bool svc_alien_sock(struct net *net, int fd)
+{
+ int err;
+ struct socket *sock = sockfd_lookup(fd, &err);
+ bool ret = false;
+
+ if (!sock)
+ goto out;
+ if (sock_net(sock->sk) != net)
+ ret = true;
+ sockfd_put(sock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(svc_alien_sock);
+
/**
* svc_addsock - add a listener socket to an RPC service
* @serv: pointer to RPC service to which to add a new listener
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 095363e..42ce6bf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1290,7 +1290,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
}
}
spin_unlock(&xprt_list_lock);
- printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+ dprintk("RPC: transport (%d) not supported\n", args->ident);
return ERR_PTR(-EIO);
found:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 62e4f9b..ed36cb5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -89,6 +89,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+ .xcl_ident = XPRT_TRANSPORT_RDMA,
};
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 716de1a..6ef8925 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -531,6 +531,7 @@ receive:
buf = node->bclink.deferred_head;
node->bclink.deferred_head = buf->next;
+ buf->next = NULL;
node->bclink.deferred_size--;
goto receive;
}
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ced60e2..1e76d91 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -76,10 +76,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
u32 num_sect, unsigned int total_len, int max_size,
struct sk_buff **buf)
{
- int dsz, sz, hsz, pos, res, cnt;
+ int dsz, sz, hsz;
+ unsigned char *to;
dsz = total_len;
- pos = hsz = msg_hdr_sz(hdr);
+ hsz = msg_hdr_sz(hdr);
sz = hsz + dsz;
msg_set_size(hdr, sz);
if (unlikely(sz > max_size)) {
@@ -91,16 +92,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
if (!(*buf))
return -ENOMEM;
skb_copy_to_linear_data(*buf, hdr, hsz);
- for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
- skb_copy_to_linear_data_offset(*buf, pos,
- msg_sect[cnt].iov_base,
- msg_sect[cnt].iov_len);
- pos += msg_sect[cnt].iov_len;
+ to = (*buf)->data + hsz;
+ if (total_len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) {
+ kfree_skb(*buf);
+ *buf = NULL;
+ return -EFAULT;
}
- if (likely(res))
- return dsz;
-
- kfree_skb(*buf);
- *buf = NULL;
- return -EFAULT;
+ return dsz;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 299e45af..ec2ecbd 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -962,6 +962,7 @@ static void tipc_purge_publications(struct name_seq *seq)
list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
publ->ref, publ->key);
+ kfree(publ);
}
}
@@ -986,7 +987,6 @@ void tipc_nametbl_stop(void)
hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
tipc_purge_publications(seq);
}
- continue;
}
kfree(table.types);
table.types = NULL;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8bcd498..1e6081f 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
u16 cmd;
- if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
+ if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN)))
cmd = TIPC_CMD_NOT_NET_ADMIN;
else
cmd = req_userhdr->cmd;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5adfd94..85d232b 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = {
.fops = &vsock_device_ops,
};
-static int __vsock_core_init(void)
+int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
{
- int err;
+ int err = mutex_lock_interruptible(&vsock_register_mutex);
+
+ if (err)
+ return err;
+
+ if (transport) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+
+ /* Transport must be the owner of the protocol so that it can't
+ * unload while there are open sockets.
+ */
+ vsock_proto.owner = owner;
+ transport = t;
vsock_init_tables();
@@ -1951,36 +1965,19 @@ static int __vsock_core_init(void)
goto err_unregister_proto;
}
+ mutex_unlock(&vsock_register_mutex);
return 0;
err_unregister_proto:
proto_unregister(&vsock_proto);
err_misc_deregister:
misc_deregister(&vsock_device);
- return err;
-}
-
-int vsock_core_init(const struct vsock_transport *t)
-{
- int retval = mutex_lock_interruptible(&vsock_register_mutex);
- if (retval)
- return retval;
-
- if (transport) {
- retval = -EBUSY;
- goto out;
- }
-
- transport = t;
- retval = __vsock_core_init();
- if (retval)
- transport = NULL;
-
-out:
+ transport = NULL;
+err_busy:
mutex_unlock(&vsock_register_mutex);
- return retval;
+ return err;
}
-EXPORT_SYMBOL_GPL(vsock_core_init);
+EXPORT_SYMBOL_GPL(__vsock_core_init);
void vsock_core_exit(void)
{
@@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.0.0-k");
+MODULE_VERSION("1.0.1.0-k");
MODULE_LICENSE("GPL v2");
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c2853bb..c3ef31a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6797,6 +6797,9 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
void *hdr = ((void **)skb->cb)[1];
struct nlattr *data = ((void **)skb->cb)[2];
+ /* clear CB data for netlink core to own from now on */
+ memset(skb->cb, 0, sizeof(skb->cb));
+
nla_nest_end(skb, data);
genlmsg_end(skb, hdr);
genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index de06d5d..8eedb15 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1432,7 +1432,7 @@ static enum reg_request_treatment
__regulatory_hint(struct wiphy *wiphy,
struct regulatory_request *pending_request)
{
- const struct ieee80211_regdomain *regd;
+ const struct ieee80211_regdomain *regd, *tmp;
bool intersect = false;
enum reg_request_treatment treatment;
struct regulatory_request *lr;
@@ -1448,7 +1448,9 @@ __regulatory_hint(struct wiphy *wiphy,
kfree(pending_request);
return PTR_ERR(regd);
}
+ tmp = get_wiphy_regdom(wiphy);
rcu_assign_pointer(wiphy->regd, regd);
+ rcu_free_regdom(tmp);
}
intersect = true;
break;
@@ -1468,7 +1470,9 @@ __regulatory_hint(struct wiphy *wiphy,
return REG_REQ_IGNORE;
}
treatment = REG_REQ_ALREADY_SET;
+ tmp = get_wiphy_regdom(wiphy);
rcu_assign_pointer(wiphy->regd, regd);
+ rcu_free_regdom(tmp);
goto new_request;
}
kfree(pending_request);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 20e86a9..2f844ee 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -242,7 +242,6 @@ void cfg80211_conn_work(struct work_struct *work)
NULL, 0, NULL, 0,
WLAN_STATUS_UNSPECIFIED_FAILURE,
false, NULL);
- cfg80211_sme_free(wdev);
}
wdev_unlock(wdev);
}
@@ -646,6 +645,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
cfg80211_unhold_bss(bss_from_pub(bss));
cfg80211_put_bss(wdev->wiphy, bss);
}
+ cfg80211_sme_free(wdev);
return;
}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ba5f0d6..064b471 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2029,7 +2029,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure,
MAC_ASSIGN(addr, addr);
__entry->key_type = key_type;
__entry->key_id = key_id;
- memcpy(__entry->tsc, tsc, 6);
+ if (tsc)
+ memcpy(__entry->tsc, tsc, 6);
),
TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm",
NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 76e1873..6b07a59 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,6 +46,11 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock);
+struct xfrm_flo {
+ struct dst_entry *dst_orig;
+ u8 flags;
+};
+
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -1875,13 +1880,14 @@ static int xdst_queue_output(struct sk_buff *skb)
}
static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
- struct dst_entry *dst,
+ struct xfrm_flo *xflo,
const struct flowi *fl,
int num_xfrms,
u16 family)
{
int err;
struct net_device *dev;
+ struct dst_entry *dst;
struct dst_entry *dst1;
struct xfrm_dst *xdst;
@@ -1889,10 +1895,13 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
if (IS_ERR(xdst))
return xdst;
- if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
+ if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
+ net->xfrm.sysctl_larval_drop ||
+ num_xfrms <= 0 ||
(fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
return xdst;
+ dst = xflo->dst_orig;
dst1 = &xdst->u.dst;
dst_hold(dst);
xdst->route = dst;
@@ -1934,7 +1943,7 @@ static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
- struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+ struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
@@ -1975,7 +1984,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
goto make_dummy_bundle;
}
- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ xflo->dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
@@ -2009,7 +2019,7 @@ make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
- xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
+ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
@@ -2109,13 +2119,18 @@ restart:
}
if (xdst == NULL) {
+ struct xfrm_flo xflo;
+
+ xflo.dst_orig = dst_orig;
+ xflo.flags = flags;
+
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
flo = flow_cache_lookup(net, fl, family, dir,
- xfrm_bundle_lookup, dst_orig);
+ xfrm_bundle_lookup, &xflo);
if (flo == NULL)
goto nopol;
if (IS_ERR(flo)) {
@@ -2143,7 +2158,7 @@ restart:
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
- return make_blackhole(net, family, dst_orig);
+ return ERR_PTR(-EREMOTE);
}
if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
DECLARE_WAITQUEUE(wait, current);
@@ -2215,6 +2230,23 @@ dropdst:
}
EXPORT_SYMBOL(xfrm_lookup);
+/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
+ * Otherwise we may send out blackholed packets.
+ */
+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ struct sock *sk, int flags)
+{
+ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
+ flags | XFRM_LOOKUP_QUEUE);
+
+ if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ return make_blackhole(net, dst_orig->ops->family, dst_orig);
+
+ return dst;
+}
+EXPORT_SYMBOL(xfrm_lookup_route);
+
static inline int
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
{
@@ -2480,7 +2512,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
skb_dst_force(skb);
- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+ dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f964d4c..32a2dd3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -930,6 +930,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
+/* A wrapper for nlmsg_multicast() checking that nlsk is still available.
+ * Must be called with RCU read lock.
+ */
+static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
+ u32 pid, unsigned int group)
+{
+ struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+ if (nlsk)
+ return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
+ else
+ return -1;
+}
+
static inline size_t xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
@@ -2253,7 +2267,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
}
#else
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
@@ -2363,7 +2377,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (!netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
@@ -2440,7 +2454,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
return -EMSGSIZE;
}
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2455,7 +2469,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
if (build_aevent(skb, x, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
}
static int xfrm_notify_sa_flush(const struct km_event *c)
@@ -2481,7 +2495,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
}
static inline size_t xfrm_sa_len(struct xfrm_state *x)
@@ -2568,7 +2582,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
out_free_skb:
kfree_skb(skb);
@@ -2659,7 +2673,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (build_acquire(skb, x, xt, xp) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
}
/* User gives us xfrm_user_policy_info followed by an array of 0
@@ -2773,7 +2787,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
if (build_polexpire(skb, xp, dir, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2835,7 +2849,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2863,7 +2877,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2932,7 +2946,7 @@ static int xfrm_send_report(struct net *net, u8 proto,
if (build_report(skb, proto, sel, addr) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
}
static inline size_t xfrm_mapping_msgsize(void)
@@ -2984,7 +2998,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
if (build_mapping(skb, x, ipaddr, sport) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
}
static struct xfrm_mgr netlink_mgr = {