diff options
author | David S. Miller <davem@davemloft.net> | 2016-05-20 22:03:26 (GMT) |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-20 22:03:26 (GMT) |
commit | 911f6b1f57c48c9f379e6658e2e5198d84acdab3 (patch) | |
tree | 2676cf613cb92be36942e63bb7880b78432d6a59 /net | |
parent | 948350140ef060a42621de0de5c396807f7ec1fc (diff) | |
parent | 3ee93eaf2bbfbe0083f71a18a265d48adbd5bb27 (diff) | |
download | linux-911f6b1f57c48c9f379e6658e2e5198d84acdab3.tar.xz |
Merge branch 'GREoIPV6'
Tom Herbert says:
====================
ipv6: Enable GUEoIPv6 and more fixes for v6 tunneling
This patch set:
- Fixes GRE6 to process translate flags correctly from configuration
- Adds support for GSO and GRO for ip6ip6 and ip4ip6
- Add support for FOU and GUE in IPv6
- Support GRE, ip6ip6 and ip4ip6 over FOU/GUE
- Fixes ip6_input to deal with UDP encapsulations
- Some other minor fixes
v2:
- Removed a check of GSO types in MPLS
- Define GSO type SKB_GSO_IPXIP6 and SKB_GSO_IPXIP4 (based on input
from Alexander)
- Don't define GSO types specifically for IP6IP6 and IP4IP6, above
fix makes that unnecessary
- Don't bother clearing encapsulation flag in UDP tunnel segment
(another item suggested by Alexander).
v3:
- Address some minor comments from Alexander
v4:
- Rebase on changes to fix IP TX tunnels
- Fix MTU issues in ip4ip6, ip6ip6
- Add test data for above
v5:
- Address feedback from Shmulik Ladkani regarding extension header
code that does not return next header but in instead relies
on returning value via nhoff. Solution here is to fix EH
processing to return nexthdr value.
- Refactored IPv4 encaps so that we won't need to create
a ip6_tunnel_core.c when adding encap support IPv6.
v6:
- Fix build issues with regard to new GSO constants
- FIx MTU calculation issues ip6_tunnel.c pointed out byt ALex
- Add encap_hlen into headroom for GREv6 to work with FOU/GUE
v7:
- Added skb_set_inner_ipproto to ip4ip6 and ip6ip6
- Clarified max_headroom in ip6_tnl_xmit
- Set features for IPv6 tunnels
- Other cleanup suggested by Alexander
- Above fixes throughput performance issues in ip4ip6 and ip6ip6,
updated test results to reflect that
Tested: Various cases of IP tunnels with netperf TCP_STREAM and TCP_RR.
- IPv4/GRE/GUE/IPv6 with RCO
1 TCP_STREAM
6616 Mbps
200 TCP_RR
1244043 tps
141/243/446 90/95/99% latencies
86.61% CPU utilization
- IPv6/GRE/GUE/IPv6 with RCO
1 TCP_STREAM
6940 Mbps
200 TCP_RR
1270903 tps
138/236/440 90/95/99% latencies
87.51% CPU utilization
- IP6IP6
1 TCP_STREAM
5307 Mbps
200 TCP_RR
498981 tps
388/498/631 90/95/99% latencies
19.75% CPU utilization (1 CPU saturated)
- IP6IP6/GUE with RCO
1 TCP_STREAM
5575 Mbps
200 TCP_RR
1233818 tps
143/244/451 90/95/99% latencies
87.57 CPU utilization
- IP4IP6
1 TCP_STREAM
5235 Mbps
200 TCP_RR
763774 tps
250/318/466 90/95/99% latencies
35.25% CPU utilization (1 CPU saturated)
- IP4IP6/GUE with RCO
1 TCP_STREAM
5337 Mbps
200 TCP_RR
1196385 tps
148/251/460 90/95/99% latencies
87.56 CPU utilization
- GRE with keyid
200 TCP_RR
744173 tps
258/332/461 90/95/99% latencies
34.59% CPU utilization (1 CPU saturated)
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/ethtool.c | 4 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 32 | ||||
-rw-r--r-- | net/ipv4/fou.c | 144 | ||||
-rw-r--r-- | net/ipv4/gre_offload.c | 14 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 45 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 9 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_offload.c | 19 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 10 | ||||
-rw-r--r-- | net/ipv6/Makefile | 1 | ||||
-rw-r--r-- | net/ipv6/fou6.c | 140 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 79 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 33 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.c | 77 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 190 | ||||
-rw-r--r-- | net/ipv6/sit.c | 4 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 13 | ||||
-rw-r--r-- | net/mpls/mpls_gso.c | 11 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 17 |
19 files changed, 589 insertions, 255 deletions
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index bdb4013..f403481 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -84,8 +84,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", - [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", + [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", + [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e6e65f..377424e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,8 +1192,8 @@ int inet_sk_rebuild_header(struct sock *sk) } EXPORT_SYMBOL(inet_sk_rebuild_header); -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); @@ -1205,24 +1205,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_TCPV6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) @@ -1298,9 +1280,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, out: return segs; } +EXPORT_SYMBOL(inet_gso_segment); -static struct sk_buff **inet_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -1416,6 +1398,7 @@ out: return pp; } +EXPORT_SYMBOL(inet_gro_receive); static struct sk_buff **ipip_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -1467,7 +1450,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } -static int inet_gro_complete(struct sk_buff *skb, int nhoff) +int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); @@ -1497,11 +1480,12 @@ out_unlock: return err; } +EXPORT_SYMBOL(inet_gro_complete); static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return inet_gro_complete(skb, nhoff); } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index eeec7d6..5f9207c 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -21,6 +21,7 @@ struct fou { u8 protocol; u8 flags; __be16 port; + u8 family; u16 type; struct list_head list; struct rcu_head rcu; @@ -47,14 +48,17 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static int fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) { - struct iphdr *iph = ip_hdr(skb); - /* Remove 'len' bytes from the packet (UDP header and * FOU header if present). */ - iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); + __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); @@ -68,7 +72,7 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - if (fou_recv_pull(skb, sizeof(struct udphdr))) + if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) goto drop; return -fou->protocol; @@ -141,7 +145,11 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) hdrlen = sizeof(struct guehdr) + optlen; - ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); /* Pull csum through the guehdr now . This can be used if * there is a remote checksum offload. @@ -426,7 +434,8 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (fou->port == fout->port) { + if (fou->port == fout->port && + fou->family == fout->family) { mutex_unlock(&fn->fou_lock); return -EALREADY; } @@ -448,31 +457,13 @@ static void fou_release(struct fou *fou) kfree_rcu(fou, rcu); } -static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = fou_udp_recv; - udp_sk(sk)->gro_receive = fou_gro_receive; - udp_sk(sk)->gro_complete = fou_gro_complete; - fou_from_sock(sk)->protocol = cfg->protocol; - - return 0; -} - -static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = gue_udp_recv; - udp_sk(sk)->gro_receive = gue_gro_receive; - udp_sk(sk)->gro_complete = gue_gro_complete; - - return 0; -} - static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { struct socket *sock = NULL; struct fou *fou = NULL; struct sock *sk; + struct udp_tunnel_sock_cfg tunnel_cfg; int err; /* Open UDP socket */ @@ -489,35 +480,36 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; - fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; + fou->family = cfg->udp_config.family; + fou->flags = cfg->flags; + fou->type = cfg->type; + fou->sock = sock; + + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.encap_type = 1; + tunnel_cfg.sk_user_data = fou; + tunnel_cfg.encap_destroy = NULL; /* Initial for fou type */ switch (cfg->type) { case FOU_ENCAP_DIRECT: - err = fou_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = fou_udp_recv; + tunnel_cfg.gro_receive = fou_gro_receive; + tunnel_cfg.gro_complete = fou_gro_complete; + fou->protocol = cfg->protocol; break; case FOU_ENCAP_GUE: - err = gue_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = gue_udp_recv; + tunnel_cfg.gro_receive = gue_gro_receive; + tunnel_cfg.gro_complete = gue_gro_complete; break; default: err = -EINVAL; goto error; } - fou->type = cfg->type; - - udp_sk(sk)->encap_type = 1; - udp_encap_enable(); - - sk->sk_user_data = fou; - fou->sock = sock; - - inet_inc_convert_csum(sk); + setup_udp_tunnel_sock(net, sock, &tunnel_cfg); sk->sk_allocation = GFP_ATOMIC; @@ -542,12 +534,13 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); __be16 port = cfg->udp_config.local_udp_port; + u8 family = cfg->udp_config.family; int err = -EINVAL; struct fou *fou; mutex_lock(&fn->fou_lock); list_for_each_entry(fou, &fn->fou_list, list) { - if (fou->port == port) { + if (fou->port == port && fou->family == family) { fou_release(fou); err = 0; break; @@ -585,8 +578,15 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_AF]) { u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); - if (family != AF_INET) - return -EINVAL; + switch (family) { + case AF_INET: + break; + case AF_INET6: + cfg->udp_config.ipv6_v6only = 1; + break; + default: + return -EAFNOSUPPORT; + } cfg->udp_config.family = family; } @@ -677,6 +677,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) struct fou_cfg cfg; struct fou *fout; __be16 port; + u8 family; int ret; ret = parse_nl_config(info, &cfg); @@ -686,6 +687,10 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) if (port == 0) return -EINVAL; + family = cfg.udp_config.family; + if (family != AF_INET && family != AF_INET6) + return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -693,7 +698,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) ret = -ESRCH; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (port == fout->port) { + if (port == fout->port && family == fout->family) { ret = fou_dump_info(fout, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); @@ -798,6 +803,22 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, *protocol = IPPROTO_UDP; } +int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) +{ + int err; + + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; + + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); + + return 0; +} +EXPORT_SYMBOL(__fou_build_header); + int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { @@ -806,26 +827,21 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; int err; - err = iptunnel_handle_offloads(skb, type); + err = __fou_build_header(skb, e, protocol, &sport, type); if (err) return err; - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); fou_build_udp(skb, e, fl4, protocol, sport); return 0; } EXPORT_SYMBOL(fou_build_header); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) +int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) { - int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : - SKB_GSO_UDP_TUNNEL; struct guehdr *guehdr; size_t hdrlen, optlen = 0; - __be16 sport; void *data; bool need_priv = false; int err; @@ -844,8 +860,8 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return err; /* Get source port (based on flow hash) before skb_push */ - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); hdrlen = sizeof(struct guehdr) + optlen; @@ -890,6 +906,22 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } + return 0; +} +EXPORT_SYMBOL(__gue_build_header); + +int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) +{ + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; + __be16 sport; + int err; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + fou_build_udp(skb, e, fl4, protocol, sport); return 0; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e88190a..ecd1e09 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -26,20 +26,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, int gre_offset, outer_hlen; bool need_csum, ufo; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_PARTIAL))) - goto out; - if (!skb->encapsulation) goto out; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a69ed94..d8f5e0a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -443,29 +443,6 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); -static int ip_encap_hlen(struct ip_tunnel_encap *e) -{ - const struct ip_tunnel_encap_ops *ops; - int hlen = -EINVAL; - - if (e->type == TUNNEL_ENCAP_NONE) - return 0; - - if (e->type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[e->type]); - if (likely(ops && ops->encap_hlen)) - hlen = ops->encap_hlen(e); - rcu_read_unlock(); - - return hlen; -} - -const struct ip_tunnel_encap_ops __rcu * - iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; - int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, unsigned int num) { @@ -519,28 +496,6 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t, } EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); -int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, - u8 *protocol, struct flowi4 *fl4) -{ - const struct ip_tunnel_encap_ops *ops; - int ret = -EINVAL; - - if (t->encap.type == TUNNEL_ENCAP_NONE) - return 0; - - if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[t->encap.type]); - if (likely(ops && ops->build_header)) - ret = ops->build_header(skb, &t->encap, protocol, fl4); - rcu_read_unlock(); - - return ret; -} -EXPORT_SYMBOL(ip_tunnel_encap); - static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df, const struct iphdr *inner_iph) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9118b0e..afd6b59 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -37,6 +37,7 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/ip_tunnels.h> +#include <net/ip6_tunnel.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -47,6 +48,14 @@ #include <net/rtnetlink.h> #include <net/dst_metadata.h> +const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(iptun_encaps); + +const struct ip6_tnl_encap_ops __rcu * + ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(ip6tun_encaps); + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9282748..9783701 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 02737b6..5c59649 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -83,25 +83,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - 0) || - !(type & (SKB_GSO_TCPV4 | - SKB_GSO_TCPV6)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b7459c..81f253b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -209,16 +209,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5e9d6bf..7ec3129 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o +obj-$(CONFIG_NET_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c new file mode 100644 index 0000000..c972d0b --- /dev/null +++ b/net/ipv6/fou6.c @@ -0,0 +1,140 @@ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <net/fou.h> +#include <net/ip.h> +#include <net/ip6_tunnel.h> +#include <net/ip6_checksum.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> + +static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, + struct flowi6 *fl6, u8 *protocol, __be16 sport) +{ + struct udphdr *uh; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + uh = udp_hdr(skb); + + uh->dest = e->dport; + uh->source = sport; + uh->len = htons(skb->len); + udp6_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM6), skb, + &fl6->saddr, &fl6->daddr, skb->len); + + *protocol = IPPROTO_UDP; +} + +int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __fou_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(fou6_build_header); + +int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(gue6_build_header); + +#ifdef CONFIG_NET_FOU_IP_TUNNELS + +static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { + .encap_hlen = fou_encap_hlen, + .build_header = fou6_build_header, +}; + +static const struct ip6_tnl_encap_ops gue_ip6tun_ops = { + .encap_hlen = gue_encap_hlen, + .build_header = gue6_build_header, +}; + +static int ip6_tnl_encap_add_fou_ops(void) +{ + int ret; + + ret = ip6_tnl_encap_add_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + if (ret < 0) { + pr_err("can't add fou6 ops\n"); + return ret; + } + + ret = ip6_tnl_encap_add_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); + if (ret < 0) { + pr_err("can't add gue6 ops\n"); + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + return ret; + } + + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + ip6_tnl_encap_del_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); +} + +#else + +static int ip6_tnl_encap_add_fou_ops(void) +{ + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ +} + +#endif + +static int __init fou6_init(void) +{ + int ret; + + ret = ip6_tnl_encap_add_fou_ops(); + + return ret; +} + +static void __exit fou6_fini(void) +{ + ip6_tnl_encap_del_fou_ops(); +} + +module_init(fou6_init); +module_exit(fou6_fini); +MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa5..6fb1b89 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -729,7 +729,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) t->tun_hlen = gre_calc_hlen(t->parms.o_flags); - t->hlen = t->tun_hlen; + t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); @@ -1022,9 +1022,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - - tunnel->hlen = tunnel->tun_hlen; - + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); dev->hard_header_len = LL_MAX_HEADER + t_hlen; @@ -1290,15 +1288,57 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; } +static bool ip6gre_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_GRE_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); + } + + if (data[IFLA_GRE_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); + } + + if (data[IFLA_GRE_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); + } + + if (data[IFLA_GRE_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); + } + + return ret; +} + static int ip6gre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip6_tnl *nt; struct net *net = dev_net(dev); struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; int err; nt = netdev_priv(dev); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &nt->parms); if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) @@ -1345,10 +1385,18 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; + struct ip_tunnel_encap ipencap; if (dev == ign->fb_tunnel_dev) return -EINVAL; + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); @@ -1400,6 +1448,14 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_GRE_FLAGS */ nla_total_size(4) + + /* IFLA_GRE_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1422,6 +1478,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, + t->encap.type) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || + nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, + t->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1440,6 +1507,10 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, + [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f185cbc..94611e4 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -223,6 +223,7 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk unsigned int nhoff; int nexthdr; bool raw; + bool have_final = false; /* * Parse extension headers @@ -236,14 +237,27 @@ resubmit: nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; +resubmit_final: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { int ret; - if (ipprot->flags & INET6_PROTO_FINAL) { + if (have_final) { + if (!(ipprot->flags & INET6_PROTO_FINAL)) { + /* Once we've seen a final protocol don't + * allow encapsulation on any non-final + * ones. This allows foo in UDP encapsulation + * to work. + */ + goto discard; + } + } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; + /* Only do this once for first final protocol */ + have_final = true; + /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ @@ -263,10 +277,21 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret > 0) - goto resubmit; - else if (ret == 0) + if (ret > 0) { + if (ipprot->flags & INET6_PROTO_FINAL) { + /* Not an extension header, most likely UDP + * encapsulation. Use return value as nexthdr + * protocol not nhoff (which presumably is + * not set by handler). + */ + nexthdr = ret; + goto resubmit_final; + } else { + goto resubmit; + } + } else if (ret == 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); + } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f5eb184..22e90e5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include <net/protocol.h> #include <net/ipv6.h> +#include <net/inet_common.h> #include "ip6_offload.h" @@ -69,24 +70,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, bool encap, udpfrag; int nhoff; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) @@ -104,7 +87,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) udpfrag = proto == IPPROTO_UDP && encap; else udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; @@ -271,9 +254,11 @@ out: return pp; } -static struct sk_buff **sit_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) { + /* Common GRO receive for SIT and IP6IP6 */ + if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; @@ -284,6 +269,21 @@ static struct sk_buff **sit_gro_receive(struct sk_buff **head, return ipv6_gro_receive(head, skb); } +static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + /* Common GRO receive for SIT and IP6IP6 */ + + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -312,10 +312,24 @@ out_unlock: static int sit_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_SIT; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return ipv6_gro_complete(skb, nhoff); } +static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return ipv6_gro_complete(skb, nhoff); +} + +static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return inet_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -328,11 +342,26 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = sit_gro_receive, + .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, }; +static const struct net_offload ip4ip6_offload = { + .callbacks = { + .gso_segment = inet_gso_segment, + .gro_receive = ip4ip6_gro_receive, + .gro_complete = ip4ip6_gro_complete, + }, +}; + +static const struct net_offload ip6ip6_offload = { + .callbacks = { + .gso_segment = ipv6_gso_segment, + .gro_receive = sit_ip6ip6_gro_receive, + .gro_complete = ip6ip6_gro_complete, + }, +}; static int __init ipv6_offload_init(void) { @@ -344,6 +373,8 @@ static int __init ipv6_offload_init(void) dev_add_offload(&ipv6_packet_offload); inet_add_offload(&sit_offload, IPPROTO_IPV6); + inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); + inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e79330f..7b0481e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1010,7 +1010,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; - unsigned int max_headroom = sizeof(struct ipv6hdr); + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; int err = -1; /* NBMA tunnel */ @@ -1063,7 +1064,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof(*ipv6h); + mtu = dst_mtu(dst) - psh_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1119,16 +1120,18 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - + /* Calculate max headroom for all the headers and adjust + * needed_headroom if necessary. + */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) - + dst->header_len; + + dst->header_len + t->hlen; if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; + err = ip6_tnl_encap(skb, t, &proto, fl6); + if (err) + return err; + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1180,6 +1183,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPIP); if (err != 0) { @@ -1234,6 +1242,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPV6); if (err != 0) { @@ -1280,6 +1293,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; + int t_hlen; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1303,6 +1317,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) else dev->flags &= ~IFF_POINTOPOINT; + t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); @@ -1316,9 +1334,9 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof(struct ipv6hdr); + t_hlen; - dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; @@ -1564,6 +1582,59 @@ int ip6_tnl_get_iflink(const struct net_device *dev) } EXPORT_SYMBOL(ip6_tnl_get_iflink); +int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(ip6_tnl_encap_add_ops); + +int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(ip6_tnl_encap_del_ops); + +int ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap) +{ + int hlen; + + memset(&t->encap, 0, sizeof(t->encap)); + + hlen = ip6_encap_hlen(ipencap); + if (hlen < 0) + return hlen; + + t->encap.type = ipencap->type; + t->encap.sport = ipencap->sport; + t->encap.dport = ipencap->dport; + t->encap.flags = ipencap->flags; + + t->encap_hlen = hlen; + t->hlen = t->encap_hlen + t->tun_hlen; + + return 0; +} +EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); + static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, @@ -1574,6 +1645,11 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define IPXIPX_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) /** * ip6_tnl_dev_setup - setup virtual tunnel device @@ -1585,20 +1661,18 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); - t = netdev_priv(dev); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); + + dev->features |= IPXIPX_FEATURES; + dev->hw_features |= IPXIPX_FEATURES; + /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); @@ -1615,6 +1689,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int ret; + int t_hlen; t->dev = dev; t->net = dev_net(dev); @@ -1630,8 +1705,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (ret) goto destroy_dst; - t->hlen = 0; t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; return 0; @@ -1729,13 +1811,55 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); } +static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} + static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip6_tnl *nt, *t; + struct ip_tunnel_encap ipencap; nt = netdev_priv(dev); + + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6_tnl_netlink_parms(data, &nt->parms); t = ip6_tnl_locate(net, &nt->parms, 0); @@ -1752,10 +1876,17 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct __ip6_tnl_parm p; struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip_tunnel_encap ipencap; if (dev == ip6n->fb_tnl_dev) return -EINVAL; + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(t, &ipencap); + + if (err < 0) + return err; + } ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); @@ -1796,6 +1927,14 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1813,6 +1952,17 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, + tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, + tunnel->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1836,6 +1986,10 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a13d8c1..0a5a255 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) { ip_rt_put(rt); goto tx_error; } @@ -1000,7 +1000,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 5429f6b..ac858c4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -36,19 +36,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index bbcf604..2055e57 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -26,15 +26,6 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t mpls_features; __be16 mpls_protocol; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCP_ECN))) - goto out; - /* Setup inner SKB. */ mpls_protocol = skb->protocol; skb->protocol = skb->inner_protocol; @@ -57,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * skb_mac_gso_segment(), an indirect caller of this function. */ __skb_pull(skb, skb->data - skb_mac_header(skb)); -out: + return segs; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6d19d2e..01d3d89 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -932,17 +932,14 @@ error: static inline int __tun_gso_type_mask(int encaps_af, int orig_af) { - if (encaps_af == AF_INET) { - if (orig_af == AF_INET) - return SKB_GSO_IPIP; - - return SKB_GSO_SIT; + switch (encaps_af) { + case AF_INET: + return SKB_GSO_IPXIP4; + case AF_INET6: + return SKB_GSO_IPXIP6; + default: + return 0; } - - /* GSO: we need to provide proper SKB_GSO_ value for IPv6: - * SKB_GSO_SIT/IPV6 - */ - return 0; } /* |