summaryrefslogtreecommitdiff
path: root/net/openvswitch/vport-vxlan.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:08:17 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:08:17 (GMT)
commitdd5cdb48edfd34401799056a9acf61078d773f90 (patch)
tree8e251fb4a4c196540fe9b6a6d8b13275f93a057c /net/openvswitch/vport-vxlan.c
parent1e1a4e8f439113b7820bc7150569f685e1cc2b43 (diff)
parent62da98656b62a5ca57f22263705175af8ded5aa1 (diff)
downloadlinux-dd5cdb48edfd34401799056a9acf61078d773f90.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Another merge window, another set of networking changes. I've heard rumblings that the lightweight tunnels infrastructure has been voted networking change of the year. But what do I know? 1) Add conntrack support to openvswitch, from Joe Stringer. 2) Initial support for VRF (Virtual Routing and Forwarding), which allows the segmentation of routing paths without using multiple devices. There are some semantic kinks to work out still, but this is a reasonably strong foundation. From David Ahern. 3) Remove spinlock fro act_bpf fast path, from Alexei Starovoitov. 4) Ignore route nexthops with a link down state in ipv6, just like ipv4. From Andy Gospodarek. 5) Remove spinlock from fast path of act_gact and act_mirred, from Eric Dumazet. 6) Document the DSA layer, from Florian Fainelli. 7) Add netconsole support to bcmgenet, systemport, and DSA. Also from Florian Fainelli. 8) Add Mellanox Switch Driver and core infrastructure, from Jiri Pirko. 9) Add support for "light weight tunnels", which allow for encapsulation and decapsulation without bearing the overhead of a full blown netdevice. From Thomas Graf, Jiri Benc, and a cast of others. 10) Add Identifier Locator Addressing support for ipv6, from Tom Herbert. 11) Support fragmented SKBs in iwlwifi, from Johannes Berg. 12) Allow perf PMUs to be accessed from eBPF programs, from Kaixu Xia. 13) Add BQL support to 3c59x driver, from Loganaden Velvindron. 14) Stop using a zero TX queue length to mean that a device shouldn't have a qdisc attached, use an explicit flag instead. From Phil Sutter. 15) Use generic geneve netdevice infrastructure in openvswitch, from Pravin B Shelar. 16) Add infrastructure to avoid re-forwarding a packet in software that was already forwarded by a hardware switch. From Scott Feldman. 17) Allow AF_PACKET fanout function to be implemented in a bpf program, from Willem de Bruijn" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1458 commits) netfilter: nf_conntrack: make nf_ct_zone_dflt built-in netfilter: nf_dup{4, 6}: fix build error when nf_conntrack disabled net: fec: clear receive interrupts before processing a packet ipv6: fix exthdrs offload registration in out_rt path xen-netback: add support for multicast control bgmac: Update fixed_phy_register() sock, diag: fix panic in sock_diag_put_filterinfo flow_dissector: Use 'const' where possible. flow_dissector: Fix function argument ordering dependency ixgbe: Resolve "initialized field overwritten" warnings ixgbe: Remove bimodal SR-IOV disabling ixgbe: Add support for reporting 2.5G link speed ixgbe: fix bounds checking in ixgbe_setup_tc for 82598 ixgbe: support for ethtool set_rxfh ixgbe: Avoid needless PHY access on copper phys ixgbe: cleanup to use cached mask value ixgbe: Remove second instance of lan_id variable ixgbe: use kzalloc for allocating one thing flow: Move __get_hash_from_flowi{4,6} into flow_dissector.c ixgbe: Remove unused PCI bus types ...
Diffstat (limited to 'net/openvswitch/vport-vxlan.c')
-rw-r--r--net/openvswitch/vport-vxlan.c229
1 files changed, 49 insertions, 180 deletions
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 6d39766..c11413d 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -17,94 +17,37 @@
* 02110-1301, USA
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
#include <linux/module.h>
-
-#include <net/icmp.h>
-#include <net/ip.h>
#include <net/udp.h>
#include <net/ip_tunnels.h>
#include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <net/vxlan.h>
#include "datapath.h"
#include "vport.h"
-#include "vport-vxlan.h"
-
-/**
- * struct vxlan_port - Keeps track of open UDP ports
- * @vs: vxlan_sock created for the port.
- * @name: vport name.
- */
-struct vxlan_port {
- struct vxlan_sock *vs;
- char name[IFNAMSIZ];
- u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
-};
-
-static struct vport_ops ovs_vxlan_vport_ops;
+#include "vport-netdev.h"
-static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- struct vxlan_metadata *md)
-{
- struct ovs_tunnel_info tun_info;
- struct vxlan_port *vxlan_port;
- struct vport *vport = vs->data;
- struct iphdr *iph;
- struct ovs_vxlan_opts opts = {
- .gbp = md->gbp,
- };
- __be64 key;
- __be16 flags;
-
- flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
- vxlan_port = vxlan_vport(vport);
- if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
- flags |= TUNNEL_VXLAN_OPT;
-
- /* Save outer tunnel values */
- iph = ip_hdr(skb);
- key = cpu_to_be64(ntohl(md->vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph,
- udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, flags, &opts, sizeof(opts));
-
- ovs_vport_receive(vport, skb, &tun_info);
-}
+static struct vport_ops ovs_vxlan_netdev_vport_ops;
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ struct vxlan_dev *vxlan = netdev_priv(vport->dev);
+ __be16 dst_port = vxlan->cfg.dst_port;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
- if (vxlan_port->exts) {
+ if (vxlan->flags & VXLAN_F_GBP) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
- if (vxlan_port->exts & VXLAN_F_GBP &&
+ if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
@@ -114,23 +57,14 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
return 0;
}
-static void vxlan_tnl_destroy(struct vport *vport)
-{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
-
- vxlan_sock_release(vxlan_port->vs);
-
- ovs_vport_deferred_free(vport);
-}
-
-static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
-static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
+ struct vxlan_config *conf)
{
- struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
- struct vxlan_port *vxlan_port;
+ struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
int err;
if (nla_len(attr) < sizeof(struct nlattr))
@@ -140,10 +74,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
if (err < 0)
return err;
- vxlan_port = vxlan_vport(vport);
-
if (exts[OVS_VXLAN_EXT_GBP])
- vxlan_port->exts |= VXLAN_F_GBP;
+ conf->flags |= VXLAN_F_GBP;
return 0;
}
@@ -152,128 +84,74 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
- struct vxlan_port *vxlan_port;
- struct vxlan_sock *vs;
+ struct net_device *dev;
struct vport *vport;
struct nlattr *a;
- u16 dst_port;
int err;
+ struct vxlan_config conf = {
+ .no_share = true,
+ .flags = VXLAN_F_COLLECT_METADATA,
+ };
if (!options) {
err = -EINVAL;
goto error;
}
+
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
- dst_port = nla_get_u16(a);
+ conf.dst_port = htons(nla_get_u16(a));
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
- vport = ovs_vport_alloc(sizeof(struct vxlan_port),
- &ovs_vxlan_vport_ops, parms);
+ vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
if (IS_ERR(vport))
return vport;
- vxlan_port = vxlan_vport(vport);
- strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
-
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
- err = vxlan_configure_exts(vport, a);
+ err = vxlan_configure_exts(vport, a, &conf);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
- vxlan_port->exts);
- if (IS_ERR(vs)) {
+ rtnl_lock();
+ dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
ovs_vport_free(vport);
- return (void *)vs;
+ return ERR_CAST(dev);
}
- vxlan_port->vs = vs;
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
return vport;
-
error:
return ERR_PTR(err);
}
-static int vxlan_ext_gbp(struct sk_buff *skb)
+static struct vport *vxlan_create(const struct vport_parms *parms)
{
- const struct ovs_tunnel_info *tun_info;
- const struct ovs_vxlan_opts *opts;
-
- tun_info = OVS_CB(skb)->egress_tun_info;
- opts = tun_info->options;
-
- if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
- tun_info->options_len >= sizeof(*opts))
- return opts->gbp;
- else
- return 0;
-}
-
-static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- struct sock *sk = vxlan_port->vs->sock->sk;
- __be16 dst_port = inet_sk(sk)->inet_sport;
- const struct ovs_key_ipv4_tunnel *tun_key;
- struct vxlan_metadata md = {0};
- struct rtable *rt;
- struct flowi4 fl;
- __be16 src_port;
- __be16 df;
- int err;
- u32 vxflags;
-
- if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
- err = -EINVAL;
- goto error;
- }
-
- tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
+ struct vport *vport;
- skb->ignore_df = 1;
+ vport = vxlan_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
- md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
- md.gbp = vxlan_ext_gbp(skb);
- vxflags = vxlan_port->exts |
- (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
-
- err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst,
- tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
- src_port, dst_port,
- &md, false, vxflags);
- if (err < 0)
- ip_rt_put(rt);
- return err;
-error:
- kfree_skb(skb);
- return err;
+ return ovs_netdev_link(vport, parms->name);
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *egress_tun_info)
+ struct dp_upcall_info *upcall)
{
+ struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = vxlan_dev_dst_port(vxlan);
__be16 src_port;
int port_min;
int port_max;
@@ -281,37 +159,28 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
inet_get_local_port_range(net, &port_min, &port_max);
src_port = udp_flow_src_port(net, skb, 0, 0, true);
- return ovs_tunnel_get_egress_info(egress_tun_info, net,
- OVS_CB(skb)->egress_tun_info,
- IPPROTO_UDP, skb->mark,
+ return ovs_tunnel_get_egress_info(upcall, net,
+ skb, IPPROTO_UDP,
src_port, dst_port);
}
-static const char *vxlan_get_name(const struct vport *vport)
-{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- return vxlan_port->name;
-}
-
-static struct vport_ops ovs_vxlan_vport_ops = {
- .type = OVS_VPORT_TYPE_VXLAN,
- .create = vxlan_tnl_create,
- .destroy = vxlan_tnl_destroy,
- .get_name = vxlan_get_name,
- .get_options = vxlan_get_options,
- .send = vxlan_tnl_send,
+static struct vport_ops ovs_vxlan_netdev_vport_ops = {
+ .type = OVS_VPORT_TYPE_VXLAN,
+ .create = vxlan_create,
+ .destroy = ovs_netdev_tunnel_destroy,
+ .get_options = vxlan_get_options,
+ .send = ovs_netdev_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,
- .owner = THIS_MODULE,
};
static int __init ovs_vxlan_tnl_init(void)
{
- return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
+ return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
}
static void __exit ovs_vxlan_tnl_exit(void)
{
- ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
+ ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
}
module_init(ovs_vxlan_tnl_init);