diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 03:01:26 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 03:01:26 (GMT) |
commit | 087afe8aaf562dc7a53f2577049830d6a3245742 (patch) | |
tree | 94fe422e62965b24030019368cb9ec4f9c90cd38 /net | |
parent | 54cf809b9512be95f53ed4a5e3b631d1ac42f0fa (diff) | |
parent | 95829b3a9c0b1d88778b23bc2afdf5a83de066ff (diff) | |
download | linux-087afe8aaf562dc7a53f2577049830d6a3245742.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes and more updates from David Miller:
1) Tunneling fixes from Tom Herbert and Alexander Duyck.
2) AF_UNIX updates some struct sock bit fields with the socket lock,
whereas setsockopt() sets overlapping ones with locking. Seperate
out the synchronized vs. the AF_UNIX unsynchronized ones to avoid
corruption. From Andrey Ryabinin.
3) Mount BPF filesystem with mount_nodev rather than mount_ns, from
Eric Biederman.
4) A couple kmemdup conversions, from Muhammad Falak R Wani.
5) BPF verifier fixes from Alexei Starovoitov.
6) Don't let tunneled UDP packets get stuck in socket queues, if
something goes wrong during the encapsulation just drop the packet
rather than signalling an error up the call stack. From Hannes
Frederic Sowa.
7) SKB ref after free in batman-adv, from Florian Westphal.
8) TCP iSCSI, ocfs2, rds, and tipc have to disable BH in it's TCP
callbacks since the TCP stack runs pre-emptibly now. From Eric
Dumazet.
9) Fix crash in fixed_phy_add, from Rabin Vincent.
10) Fix length checks in xen-netback, from Paul Durrant.
11) Fix mixup in KEY vs KEYID macsec attributes, from Sabrina Dubroca.
12) RDS connection spamming bug fixes from Sowmini Varadhan
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (152 commits)
net: suppress warnings on dev_alloc_skb
uapi glibc compat: fix compilation when !__USE_MISC in glibc
udp: prevent skbs lingering in tunnel socket queues
bpf: teach verifier to recognize imm += ptr pattern
bpf: support decreasing order in direct packet access
net: usb: ch9200: use kmemdup
ps3_gelic: use kmemdup
net:liquidio: use kmemdup
bpf: Use mount_nodev not mount_ns to mount the bpf filesystem
net: cdc_ncm: update datagram size after changing mtu
tuntap: correctly wake up process during uninit
intel: Add support for IPv6 IP-in-IP offload
ip6_gre: Do not allow segmentation offloads GRE_CSUM is enabled with FOU/GUE
RDS: TCP: Avoid rds connection churn from rogue SYNs
RDS: TCP: rds_tcp_accept_worker() must exit gracefully when terminating rds-tcp
net: sock: move ->sk_shutdown out of bitfields.
ipv6: Don't reset inner headers in ip6_tnl_xmit
ip4ip6: Support for GSO/GRO
ip6ip6: Support for GSO/GRO
ipv6: Set features for IPv6 tunnels
...
Diffstat (limited to 'net')
33 files changed, 714 insertions, 303 deletions
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7f98a9d..ce2f203 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -157,10 +157,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, orig_node->bat_iv.bcast_own = data_ptr; data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); - if (!data_ptr) { - kfree(orig_node->bat_iv.bcast_own); + if (!data_ptr) goto unlock; - } memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, (max_if_num - 1) * sizeof(u8)); @@ -1182,9 +1180,10 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int tq_asym_penalty, inv_asym_penalty, if_num; + int if_num; + unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; - int tq_iface_penalty; + unsigned int tq_iface_penalty; bool ret = false; /* find corresponding one hop neighbor */ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 3ff8bd1..0a12e5c 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -27,6 +27,7 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> +#include <linux/stddef.h> #include <linux/types.h> #include <linux/workqueue.h> @@ -39,6 +40,16 @@ static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + + if (primary_if) { + batadv_v_elp_iface_activate(primary_if, hard_iface); + batadv_hardif_put(primary_if); + } + /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can * set the interface as ACTIVE right away, without any risk of race * condition @@ -72,16 +83,34 @@ static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) batadv_v_elp_iface_disable(hard_iface); } -static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) -{ -} - static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) { batadv_v_elp_primary_iface_set(hard_iface); batadv_v_ogm_primary_iface_set(hard_iface); } +/** + * batadv_v_iface_update_mac - react to hard-interface MAC address change + * @hard_iface: the modified interface + * + * If the modified interface is the primary one, update the originator + * address in the ELP and OGM messages to reflect the new MAC address. + */ +static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (primary_if != hard_iface) + goto out; + + batadv_v_primary_iface_set(hard_iface); +out: + if (primary_if) + batadv_hardif_put(primary_if); +} + static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { @@ -255,14 +284,23 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; + int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + if (WARN_ON(!ifinfo1)) + goto err_ifinfo1; + ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo2)) + goto err_ifinfo2; - if (WARN_ON(!ifinfo1 || !ifinfo2)) - return 0; + ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; - return ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; + batadv_neigh_ifinfo_put(ifinfo2); +err_ifinfo2: + batadv_neigh_ifinfo_put(ifinfo1); +err_ifinfo1: + return ret; } static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, @@ -272,14 +310,26 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; u32 threshold; + bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + if (WARN_ON(!ifinfo1)) + goto err_ifinfo1; + ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo2)) + goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; threshold = ifinfo1->bat_v.throughput - threshold; - return ifinfo2->bat_v.throughput > threshold; + ret = ifinfo2->bat_v.throughput > threshold; + + batadv_neigh_ifinfo_put(ifinfo2); +err_ifinfo2: + batadv_neigh_ifinfo_put(ifinfo1); +err_ifinfo1: + return ret; } static struct batadv_algo_ops batadv_batman_v __read_mostly = { diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 3844e7e..df42eb1 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -377,6 +377,27 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) } /** + * batadv_v_elp_iface_activate - update the ELP buffer belonging to the given + * hard-interface + * @primary_iface: the new primary interface + * @hard_iface: interface holding the to-be-updated buffer + */ +void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, + struct batadv_hard_iface *hard_iface) +{ + struct batadv_elp_packet *elp_packet; + struct sk_buff *skb; + + if (!hard_iface->bat_v.elp_skb) + return; + + skb = hard_iface->bat_v.elp_skb; + elp_packet = (struct batadv_elp_packet *)skb->data; + ether_addr_copy(elp_packet->orig, + primary_iface->net_dev->dev_addr); +} + +/** * batadv_v_elp_primary_iface_set - change internal data to reflect the new * primary interface * @primary_iface: the new primary interface @@ -384,8 +405,6 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) { struct batadv_hard_iface *hard_iface; - struct batadv_elp_packet *elp_packet; - struct sk_buff *skb; /* update orig field of every elp iface belonging to this mesh */ rcu_read_lock(); @@ -393,13 +412,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) if (primary_iface->soft_iface != hard_iface->soft_iface) continue; - if (!hard_iface->bat_v.elp_skb) - continue; - - skb = hard_iface->bat_v.elp_skb; - elp_packet = (struct batadv_elp_packet *)skb->data; - ether_addr_copy(elp_packet->orig, - primary_iface->net_dev->dev_addr); + batadv_v_elp_iface_activate(primary_iface, hard_iface); } rcu_read_unlock(); } diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index e95f1bc..cc130b2 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -25,6 +25,8 @@ struct work_struct; int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); +void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, + struct batadv_hard_iface *hard_iface); void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 1ff4ee4..7f51bc2 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -619,6 +619,8 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh = NULL; + spin_lock_bh(&orig_node->neigh_list_lock); + neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) goto out; @@ -650,15 +652,15 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, kref_init(&neigh_node->refcount); kref_get(&neigh_node->refcount); - spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); - spin_unlock_bh(&orig_node->neigh_list_lock); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); out: + spin_unlock_bh(&orig_node->neigh_list_lock); + if (hardif_neigh) batadv_hardif_neigh_put(hardif_neigh); return neigh_node; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index ae850f2..e3857ed 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -601,6 +601,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); int res, hdr_len, ret = NET_RX_DROP; + unsigned int len; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -641,6 +642,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (hdr_len > 0) batadv_skb_set_priority(skb, hdr_len); + len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); /* translate transmit result into receive result */ @@ -648,7 +650,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, - skb->len + ETH_HLEN); + len + ETH_HLEN); ret = NET_RX_SUCCESS; } else if (res == NET_XMIT_POLICED) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index bdb4013..f403481 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -84,8 +84,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", - [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", + [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", + [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e6e65f..377424e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,8 +1192,8 @@ int inet_sk_rebuild_header(struct sock *sk) } EXPORT_SYMBOL(inet_sk_rebuild_header); -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); @@ -1205,24 +1205,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_TCPV6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) @@ -1298,9 +1280,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, out: return segs; } +EXPORT_SYMBOL(inet_gso_segment); -static struct sk_buff **inet_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -1416,6 +1398,7 @@ out: return pp; } +EXPORT_SYMBOL(inet_gro_receive); static struct sk_buff **ipip_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -1467,7 +1450,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } -static int inet_gro_complete(struct sk_buff *skb, int nhoff) +int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); @@ -1497,11 +1480,12 @@ out_unlock: return err; } +EXPORT_SYMBOL(inet_gro_complete); static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return inet_gro_complete(skb, nhoff); } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index eeec7d6..5f9207c 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -21,6 +21,7 @@ struct fou { u8 protocol; u8 flags; __be16 port; + u8 family; u16 type; struct list_head list; struct rcu_head rcu; @@ -47,14 +48,17 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static int fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) { - struct iphdr *iph = ip_hdr(skb); - /* Remove 'len' bytes from the packet (UDP header and * FOU header if present). */ - iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); + __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); @@ -68,7 +72,7 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - if (fou_recv_pull(skb, sizeof(struct udphdr))) + if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) goto drop; return -fou->protocol; @@ -141,7 +145,11 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) hdrlen = sizeof(struct guehdr) + optlen; - ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); /* Pull csum through the guehdr now . This can be used if * there is a remote checksum offload. @@ -426,7 +434,8 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (fou->port == fout->port) { + if (fou->port == fout->port && + fou->family == fout->family) { mutex_unlock(&fn->fou_lock); return -EALREADY; } @@ -448,31 +457,13 @@ static void fou_release(struct fou *fou) kfree_rcu(fou, rcu); } -static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = fou_udp_recv; - udp_sk(sk)->gro_receive = fou_gro_receive; - udp_sk(sk)->gro_complete = fou_gro_complete; - fou_from_sock(sk)->protocol = cfg->protocol; - - return 0; -} - -static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = gue_udp_recv; - udp_sk(sk)->gro_receive = gue_gro_receive; - udp_sk(sk)->gro_complete = gue_gro_complete; - - return 0; -} - static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { struct socket *sock = NULL; struct fou *fou = NULL; struct sock *sk; + struct udp_tunnel_sock_cfg tunnel_cfg; int err; /* Open UDP socket */ @@ -489,35 +480,36 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; - fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; + fou->family = cfg->udp_config.family; + fou->flags = cfg->flags; + fou->type = cfg->type; + fou->sock = sock; + + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.encap_type = 1; + tunnel_cfg.sk_user_data = fou; + tunnel_cfg.encap_destroy = NULL; /* Initial for fou type */ switch (cfg->type) { case FOU_ENCAP_DIRECT: - err = fou_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = fou_udp_recv; + tunnel_cfg.gro_receive = fou_gro_receive; + tunnel_cfg.gro_complete = fou_gro_complete; + fou->protocol = cfg->protocol; break; case FOU_ENCAP_GUE: - err = gue_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = gue_udp_recv; + tunnel_cfg.gro_receive = gue_gro_receive; + tunnel_cfg.gro_complete = gue_gro_complete; break; default: err = -EINVAL; goto error; } - fou->type = cfg->type; - - udp_sk(sk)->encap_type = 1; - udp_encap_enable(); - - sk->sk_user_data = fou; - fou->sock = sock; - - inet_inc_convert_csum(sk); + setup_udp_tunnel_sock(net, sock, &tunnel_cfg); sk->sk_allocation = GFP_ATOMIC; @@ -542,12 +534,13 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); __be16 port = cfg->udp_config.local_udp_port; + u8 family = cfg->udp_config.family; int err = -EINVAL; struct fou *fou; mutex_lock(&fn->fou_lock); list_for_each_entry(fou, &fn->fou_list, list) { - if (fou->port == port) { + if (fou->port == port && fou->family == family) { fou_release(fou); err = 0; break; @@ -585,8 +578,15 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_AF]) { u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); - if (family != AF_INET) - return -EINVAL; + switch (family) { + case AF_INET: + break; + case AF_INET6: + cfg->udp_config.ipv6_v6only = 1; + break; + default: + return -EAFNOSUPPORT; + } cfg->udp_config.family = family; } @@ -677,6 +677,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) struct fou_cfg cfg; struct fou *fout; __be16 port; + u8 family; int ret; ret = parse_nl_config(info, &cfg); @@ -686,6 +687,10 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) if (port == 0) return -EINVAL; + family = cfg.udp_config.family; + if (family != AF_INET && family != AF_INET6) + return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -693,7 +698,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) ret = -ESRCH; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (port == fout->port) { + if (port == fout->port && family == fout->family) { ret = fou_dump_info(fout, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); @@ -798,6 +803,22 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, *protocol = IPPROTO_UDP; } +int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) +{ + int err; + + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; + + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); + + return 0; +} +EXPORT_SYMBOL(__fou_build_header); + int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { @@ -806,26 +827,21 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; int err; - err = iptunnel_handle_offloads(skb, type); + err = __fou_build_header(skb, e, protocol, &sport, type); if (err) return err; - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); fou_build_udp(skb, e, fl4, protocol, sport); return 0; } EXPORT_SYMBOL(fou_build_header); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) +int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) { - int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : - SKB_GSO_UDP_TUNNEL; struct guehdr *guehdr; size_t hdrlen, optlen = 0; - __be16 sport; void *data; bool need_priv = false; int err; @@ -844,8 +860,8 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return err; /* Get source port (based on flow hash) before skb_push */ - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); hdrlen = sizeof(struct guehdr) + optlen; @@ -890,6 +906,22 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } + return 0; +} +EXPORT_SYMBOL(__gue_build_header); + +int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) +{ + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; + __be16 sport; + int err; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + fou_build_udp(skb, e, fl4, protocol, sport); return 0; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e88190a..ecd1e09 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -26,20 +26,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, int gre_offset, outer_hlen; bool need_csum, ufo; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_PARTIAL))) - goto out; - if (!skb->encapsulation) goto out; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a69ed94..d8f5e0a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -443,29 +443,6 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); -static int ip_encap_hlen(struct ip_tunnel_encap *e) -{ - const struct ip_tunnel_encap_ops *ops; - int hlen = -EINVAL; - - if (e->type == TUNNEL_ENCAP_NONE) - return 0; - - if (e->type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[e->type]); - if (likely(ops && ops->encap_hlen)) - hlen = ops->encap_hlen(e); - rcu_read_unlock(); - - return hlen; -} - -const struct ip_tunnel_encap_ops __rcu * - iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; - int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, unsigned int num) { @@ -519,28 +496,6 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t, } EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); -int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, - u8 *protocol, struct flowi4 *fl4) -{ - const struct ip_tunnel_encap_ops *ops; - int ret = -EINVAL; - - if (t->encap.type == TUNNEL_ENCAP_NONE) - return 0; - - if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[t->encap.type]); - if (likely(ops && ops->build_header)) - ret = ops->build_header(skb, &t->encap, protocol, fl4); - rcu_read_unlock(); - - return ret; -} -EXPORT_SYMBOL(ip_tunnel_encap); - static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df, const struct iphdr *inner_iph) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9118b0e..afd6b59 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -37,6 +37,7 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/ip_tunnels.h> +#include <net/ip6_tunnel.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -47,6 +48,14 @@ #include <net/rtnetlink.h> #include <net/dst_metadata.h> +const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(iptun_encaps); + +const struct ip6_tnl_encap_ops __rcu * + ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(ip6tun_encaps); + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9282748..9783701 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 02737b6..5c59649 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -83,25 +83,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - 0) || - !(type & (SKB_GSO_TCPV4 | - SKB_GSO_TCPV6)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e3ebfe..d56c055 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1565,7 +1565,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b7459c..81f253b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -209,16 +209,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5e9d6bf..7ec3129 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o +obj-$(CONFIG_NET_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c new file mode 100644 index 0000000..c972d0b --- /dev/null +++ b/net/ipv6/fou6.c @@ -0,0 +1,140 @@ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <net/fou.h> +#include <net/ip.h> +#include <net/ip6_tunnel.h> +#include <net/ip6_checksum.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> + +static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, + struct flowi6 *fl6, u8 *protocol, __be16 sport) +{ + struct udphdr *uh; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + uh = udp_hdr(skb); + + uh->dest = e->dport; + uh->source = sport; + uh->len = htons(skb->len); + udp6_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM6), skb, + &fl6->saddr, &fl6->daddr, skb->len); + + *protocol = IPPROTO_UDP; +} + +int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __fou_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(fou6_build_header); + +int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(gue6_build_header); + +#ifdef CONFIG_NET_FOU_IP_TUNNELS + +static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { + .encap_hlen = fou_encap_hlen, + .build_header = fou6_build_header, +}; + +static const struct ip6_tnl_encap_ops gue_ip6tun_ops = { + .encap_hlen = gue_encap_hlen, + .build_header = gue6_build_header, +}; + +static int ip6_tnl_encap_add_fou_ops(void) +{ + int ret; + + ret = ip6_tnl_encap_add_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + if (ret < 0) { + pr_err("can't add fou6 ops\n"); + return ret; + } + + ret = ip6_tnl_encap_add_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); + if (ret < 0) { + pr_err("can't add gue6 ops\n"); + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + return ret; + } + + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + ip6_tnl_encap_del_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); +} + +#else + +static int ip6_tnl_encap_add_fou_ops(void) +{ + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ +} + +#endif + +static int __init fou6_init(void) +{ + int ret; + + ret = ip6_tnl_encap_add_fou_ops(); + + return ret; +} + +static void __exit fou6_fini(void) +{ + ip6_tnl_encap_del_fou_ops(); +} + +module_init(fou6_init); +module_exit(fou6_fini); +MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa5..af503f5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -729,7 +729,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) t->tun_hlen = gre_calc_hlen(t->parms.o_flags); - t->hlen = t->tun_hlen; + t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); @@ -1022,9 +1022,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - - tunnel->hlen = tunnel->tun_hlen; - + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); dev->hard_header_len = LL_MAX_HEADER + t_hlen; @@ -1290,15 +1288,57 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; } +static bool ip6gre_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_GRE_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); + } + + if (data[IFLA_GRE_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); + } + + if (data[IFLA_GRE_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); + } + + if (data[IFLA_GRE_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); + } + + return ret; +} + static int ip6gre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip6_tnl *nt; struct net *net = dev_net(dev); struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; int err; nt = netdev_priv(dev); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &nt->parms); if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) @@ -1315,11 +1355,15 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, dev->hw_features |= GRE6_FEATURES; if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP segmentation offload is not supported when we - * generate output sequences. + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. */ - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + (nt->encap.type == TUNNEL_ENCAP_NONE)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } /* Can use a lockless transmit, unless we generate * output sequences @@ -1345,10 +1389,18 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; + struct ip_tunnel_encap ipencap; if (dev == ign->fb_tunnel_dev) return -EINVAL; + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); @@ -1400,6 +1452,14 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_GRE_FLAGS */ nla_total_size(4) + + /* IFLA_GRE_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1422,6 +1482,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, + t->encap.type) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || + nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, + t->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1440,6 +1511,10 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, + [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f185cbc..94611e4 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -223,6 +223,7 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk unsigned int nhoff; int nexthdr; bool raw; + bool have_final = false; /* * Parse extension headers @@ -236,14 +237,27 @@ resubmit: nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; +resubmit_final: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { int ret; - if (ipprot->flags & INET6_PROTO_FINAL) { + if (have_final) { + if (!(ipprot->flags & INET6_PROTO_FINAL)) { + /* Once we've seen a final protocol don't + * allow encapsulation on any non-final + * ones. This allows foo in UDP encapsulation + * to work. + */ + goto discard; + } + } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; + /* Only do this once for first final protocol */ + have_final = true; + /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ @@ -263,10 +277,21 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret > 0) - goto resubmit; - else if (ret == 0) + if (ret > 0) { + if (ipprot->flags & INET6_PROTO_FINAL) { + /* Not an extension header, most likely UDP + * encapsulation. Use return value as nexthdr + * protocol not nhoff (which presumably is + * not set by handler). + */ + nexthdr = ret; + goto resubmit_final; + } else { + goto resubmit; + } + } else if (ret == 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); + } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f5eb184..22e90e5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include <net/protocol.h> #include <net/ipv6.h> +#include <net/inet_common.h> #include "ip6_offload.h" @@ -69,24 +70,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, bool encap, udpfrag; int nhoff; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) @@ -104,7 +87,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) udpfrag = proto == IPPROTO_UDP && encap; else udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; @@ -271,9 +254,11 @@ out: return pp; } -static struct sk_buff **sit_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) { + /* Common GRO receive for SIT and IP6IP6 */ + if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; @@ -284,6 +269,21 @@ static struct sk_buff **sit_gro_receive(struct sk_buff **head, return ipv6_gro_receive(head, skb); } +static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + /* Common GRO receive for SIT and IP6IP6 */ + + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -312,10 +312,24 @@ out_unlock: static int sit_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_SIT; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return ipv6_gro_complete(skb, nhoff); } +static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return ipv6_gro_complete(skb, nhoff); +} + +static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return inet_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -328,11 +342,26 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = sit_gro_receive, + .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, }; +static const struct net_offload ip4ip6_offload = { + .callbacks = { + .gso_segment = inet_gso_segment, + .gro_receive = ip4ip6_gro_receive, + .gro_complete = ip4ip6_gro_complete, + }, +}; + +static const struct net_offload ip6ip6_offload = { + .callbacks = { + .gso_segment = ipv6_gso_segment, + .gro_receive = sit_ip6ip6_gro_receive, + .gro_complete = ip6ip6_gro_complete, + }, +}; static int __init ipv6_offload_init(void) { @@ -344,6 +373,8 @@ static int __init ipv6_offload_init(void) dev_add_offload(&ipv6_packet_offload); inet_add_offload(&sit_offload, IPPROTO_IPV6); + inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); + inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e79330f..7b0481e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1010,7 +1010,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; - unsigned int max_headroom = sizeof(struct ipv6hdr); + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; int err = -1; /* NBMA tunnel */ @@ -1063,7 +1064,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof(*ipv6h); + mtu = dst_mtu(dst) - psh_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1119,16 +1120,18 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - + /* Calculate max headroom for all the headers and adjust + * needed_headroom if necessary. + */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) - + dst->header_len; + + dst->header_len + t->hlen; if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; + err = ip6_tnl_encap(skb, t, &proto, fl6); + if (err) + return err; + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1180,6 +1183,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPIP); if (err != 0) { @@ -1234,6 +1242,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPV6); if (err != 0) { @@ -1280,6 +1293,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; + int t_hlen; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1303,6 +1317,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) else dev->flags &= ~IFF_POINTOPOINT; + t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); @@ -1316,9 +1334,9 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof(struct ipv6hdr); + t_hlen; - dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; @@ -1564,6 +1582,59 @@ int ip6_tnl_get_iflink(const struct net_device *dev) } EXPORT_SYMBOL(ip6_tnl_get_iflink); +int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(ip6_tnl_encap_add_ops); + +int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(ip6_tnl_encap_del_ops); + +int ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap) +{ + int hlen; + + memset(&t->encap, 0, sizeof(t->encap)); + + hlen = ip6_encap_hlen(ipencap); + if (hlen < 0) + return hlen; + + t->encap.type = ipencap->type; + t->encap.sport = ipencap->sport; + t->encap.dport = ipencap->dport; + t->encap.flags = ipencap->flags; + + t->encap_hlen = hlen; + t->hlen = t->encap_hlen + t->tun_hlen; + + return 0; +} +EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); + static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, @@ -1574,6 +1645,11 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define IPXIPX_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) /** * ip6_tnl_dev_setup - setup virtual tunnel device @@ -1585,20 +1661,18 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); - t = netdev_priv(dev); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); + + dev->features |= IPXIPX_FEATURES; + dev->hw_features |= IPXIPX_FEATURES; + /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); @@ -1615,6 +1689,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int ret; + int t_hlen; t->dev = dev; t->net = dev_net(dev); @@ -1630,8 +1705,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (ret) goto destroy_dst; - t->hlen = 0; t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; return 0; @@ -1729,13 +1811,55 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); } +static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} + static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip6_tnl *nt, *t; + struct ip_tunnel_encap ipencap; nt = netdev_priv(dev); + + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6_tnl_netlink_parms(data, &nt->parms); t = ip6_tnl_locate(net, &nt->parms, 0); @@ -1752,10 +1876,17 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct __ip6_tnl_parm p; struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip_tunnel_encap ipencap; if (dev == ip6n->fb_tnl_dev) return -EINVAL; + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(t, &ipencap); + + if (err < 0) + return err; + } ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); @@ -1796,6 +1927,14 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1813,6 +1952,17 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, + tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, + tunnel->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1836,6 +1986,10 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a13d8c1..0a5a255 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) { ip_rt_put(rt); goto tx_error; } @@ -1000,7 +1000,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2ba6a77..2da1896 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -617,7 +617,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 5429f6b..ac858c4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -36,19 +36,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 40662d73..0b68ba7 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1483,7 +1483,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, long timeo; struct kcm_rx_msg *rxm; int err = 0; - size_t copied; + ssize_t copied; struct sk_buff *skb; /* Only support splice for SOCKSEQPACKET */ diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index bbcf604..2055e57 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -26,15 +26,6 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t mpls_features; __be16 mpls_protocol; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCP_ECN))) - goto out; - /* Setup inner SKB. */ mpls_protocol = skb->protocol; skb->protocol = skb->inner_protocol; @@ -57,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * skb_mac_gso_segment(), an indirect caller of this function. */ __skb_pull(skb, skb->data - skb_mac_header(skb)); -out: + return segs; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6d19d2e..01d3d89 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -932,17 +932,14 @@ error: static inline int __tun_gso_type_mask(int encaps_af, int orig_af) { - if (encaps_af == AF_INET) { - if (orig_af == AF_INET) - return SKB_GSO_IPIP; - - return SKB_GSO_SIT; + switch (encaps_af) { + case AF_INET: + return SKB_GSO_IPXIP4; + case AF_INET6: + return SKB_GSO_IPXIP6; + default: + return 0; } - - /* GSO: we need to provide proper SKB_GSO_ value for IPv6: - * SKB_GSO_SIT/IPV6 - */ - return 0; } /* diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 49a3fcf..fb82e0a 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -69,7 +69,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index be263cd..4bf4bef 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -80,6 +80,9 @@ int rds_tcp_accept_one(struct socket *sock) int conn_state; struct sock *nsk; + if (!sock) /* module unload or netns delete in progress */ + return -ENETUNREACH; + ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); @@ -129,11 +132,13 @@ int rds_tcp_accept_one(struct socket *sock) * so we must quiesce any send threads before resetting * c_transport_data. */ - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); - if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || + !conn->c_outgoing) { goto rst_nsk; - } else if (rs_tcp->t_sock) { + } else { + atomic_set(&conn->c_state, RDS_CONN_CONNECTING); + wait_event(conn->c_waitq, + !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); conn->c_outgoing = 0; } @@ -166,7 +171,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -183,7 +188,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rds_tcp_accept_work(sk); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index d75d8b5..c3196f9 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -301,7 +301,7 @@ void rds_tcp_data_ready(struct sock *sk) rdsdebug("data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -315,7 +315,7 @@ void rds_tcp_data_ready(struct sock *sk) if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 2894e60..22d0f20 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -180,7 +180,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -200,7 +200,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/tipc/server.c b/net/tipc/server.c index 7a0af2d..272d20a 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -138,28 +138,28 @@ static void sock_data_ready(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->rcv_wq, &con->rwork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void sock_write_space(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->send_wq, &con->swork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) |