summaryrefslogtreecommitdiff
path: root/net/mpls/af_mpls.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:08:17 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:08:17 (GMT)
commitdd5cdb48edfd34401799056a9acf61078d773f90 (patch)
tree8e251fb4a4c196540fe9b6a6d8b13275f93a057c /net/mpls/af_mpls.c
parent1e1a4e8f439113b7820bc7150569f685e1cc2b43 (diff)
parent62da98656b62a5ca57f22263705175af8ded5aa1 (diff)
downloadlinux-dd5cdb48edfd34401799056a9acf61078d773f90.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Another merge window, another set of networking changes. I've heard rumblings that the lightweight tunnels infrastructure has been voted networking change of the year. But what do I know? 1) Add conntrack support to openvswitch, from Joe Stringer. 2) Initial support for VRF (Virtual Routing and Forwarding), which allows the segmentation of routing paths without using multiple devices. There are some semantic kinks to work out still, but this is a reasonably strong foundation. From David Ahern. 3) Remove spinlock fro act_bpf fast path, from Alexei Starovoitov. 4) Ignore route nexthops with a link down state in ipv6, just like ipv4. From Andy Gospodarek. 5) Remove spinlock from fast path of act_gact and act_mirred, from Eric Dumazet. 6) Document the DSA layer, from Florian Fainelli. 7) Add netconsole support to bcmgenet, systemport, and DSA. Also from Florian Fainelli. 8) Add Mellanox Switch Driver and core infrastructure, from Jiri Pirko. 9) Add support for "light weight tunnels", which allow for encapsulation and decapsulation without bearing the overhead of a full blown netdevice. From Thomas Graf, Jiri Benc, and a cast of others. 10) Add Identifier Locator Addressing support for ipv6, from Tom Herbert. 11) Support fragmented SKBs in iwlwifi, from Johannes Berg. 12) Allow perf PMUs to be accessed from eBPF programs, from Kaixu Xia. 13) Add BQL support to 3c59x driver, from Loganaden Velvindron. 14) Stop using a zero TX queue length to mean that a device shouldn't have a qdisc attached, use an explicit flag instead. From Phil Sutter. 15) Use generic geneve netdevice infrastructure in openvswitch, from Pravin B Shelar. 16) Add infrastructure to avoid re-forwarding a packet in software that was already forwarded by a hardware switch. From Scott Feldman. 17) Allow AF_PACKET fanout function to be implemented in a bpf program, from Willem de Bruijn" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1458 commits) netfilter: nf_conntrack: make nf_ct_zone_dflt built-in netfilter: nf_dup{4, 6}: fix build error when nf_conntrack disabled net: fec: clear receive interrupts before processing a packet ipv6: fix exthdrs offload registration in out_rt path xen-netback: add support for multicast control bgmac: Update fixed_phy_register() sock, diag: fix panic in sock_diag_put_filterinfo flow_dissector: Use 'const' where possible. flow_dissector: Fix function argument ordering dependency ixgbe: Resolve "initialized field overwritten" warnings ixgbe: Remove bimodal SR-IOV disabling ixgbe: Add support for reporting 2.5G link speed ixgbe: fix bounds checking in ixgbe_setup_tc for 82598 ixgbe: support for ethtool set_rxfh ixgbe: Avoid needless PHY access on copper phys ixgbe: cleanup to use cached mask value ixgbe: Remove second instance of lan_id variable ixgbe: use kzalloc for allocating one thing flow: Move __get_hash_from_flowi{4,6} into flow_dissector.c ixgbe: Remove unused PCI bus types ...
Diffstat (limited to 'net/mpls/af_mpls.c')
-rw-r--r--net/mpls/af_mpls.c201
1 files changed, 157 insertions, 44 deletions
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1f93a59..bb185a2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -15,6 +15,10 @@
#include <net/ip_fib.h>
#include <net/netevent.h>
#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
#include "internal.h"
#define LABEL_NOT_SPECIFIED (1<<20)
@@ -23,11 +27,23 @@
/* This maximum ha length copied from the definition of struct neighbour */
#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+enum mpls_payload_type {
+ MPT_UNSPEC, /* IPv4 or IPv6 */
+ MPT_IPV4 = 4,
+ MPT_IPV6 = 6,
+
+ /* Other types not implemented:
+ * - Pseudo-wire with or without control word (RFC4385)
+ * - GAL (RFC5586)
+ */
+};
+
struct mpls_route { /* next hop label forwarding entry */
struct net_device __rcu *rt_dev;
struct rcu_head rt_rcu;
u32 rt_label[MAX_NEW_LABELS];
u8 rt_protocol; /* routing protocol that set this entry */
+ u8 rt_payload_type;
u8 rt_labels;
u8 rt_via_alen;
u8 rt_via_table;
@@ -58,10 +74,11 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
return rcu_dereference_rtnl(dev->mpls_ptr);
}
-static bool mpls_output_possible(const struct net_device *dev)
+bool mpls_output_possible(const struct net_device *dev)
{
return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
}
+EXPORT_SYMBOL_GPL(mpls_output_possible);
static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
{
@@ -69,13 +86,14 @@ static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
return rt->rt_labels * sizeof(struct mpls_shim_hdr);
}
-static unsigned int mpls_dev_mtu(const struct net_device *dev)
+unsigned int mpls_dev_mtu(const struct net_device *dev)
{
/* The amount of data the layer 2 frame can hold */
return dev->mtu;
}
+EXPORT_SYMBOL_GPL(mpls_dev_mtu);
-static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
@@ -85,20 +103,13 @@ static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
struct mpls_entry_decoded dec)
{
- /* RFC4385 and RFC5586 encode other packets in mpls such that
- * they don't conflict with the ip version number, making
- * decoding by examining the ip version correct in everything
- * except for the strangest cases.
- *
- * The strange cases if we choose to support them will require
- * manual configuration.
- */
- struct iphdr *hdr4;
- bool success = true;
+ enum mpls_payload_type payload_type;
+ bool success = false;
/* The IPv4 code below accesses through the IPv4 header
* checksum, which is 12 bytes into the packet.
@@ -113,23 +124,32 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
if (!pskb_may_pull(skb, 12))
return false;
- /* Use ip_hdr to find the ip protocol version */
- hdr4 = ip_hdr(skb);
- if (hdr4->version == 4) {
+ payload_type = rt->rt_payload_type;
+ if (payload_type == MPT_UNSPEC)
+ payload_type = ip_hdr(skb)->version;
+
+ switch (payload_type) {
+ case MPT_IPV4: {
+ struct iphdr *hdr4 = ip_hdr(skb);
skb->protocol = htons(ETH_P_IP);
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
htons(dec.ttl << 8));
hdr4->ttl = dec.ttl;
+ success = true;
+ break;
}
- else if (hdr4->version == 6) {
+ case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
hdr6->hop_limit = dec.ttl;
+ success = true;
+ break;
+ }
+ case MPT_UNSPEC:
+ break;
}
- else
- /* version 0 and version 1 are used by pseudo wires */
- success = false;
+
return success;
}
@@ -248,16 +268,17 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
};
struct mpls_route_config {
- u32 rc_protocol;
- u32 rc_ifindex;
- u16 rc_via_table;
- u16 rc_via_alen;
- u8 rc_via[MAX_VIA_ALEN];
- u32 rc_label;
- u32 rc_output_labels;
- u32 rc_output_label[MAX_NEW_LABELS];
- u32 rc_nlflags;
- struct nl_info rc_nlinfo;
+ u32 rc_protocol;
+ u32 rc_ifindex;
+ u16 rc_via_table;
+ u16 rc_via_alen;
+ u8 rc_via[MAX_VIA_ALEN];
+ u32 rc_label;
+ u32 rc_output_labels;
+ u32 rc_output_label[MAX_NEW_LABELS];
+ u32 rc_nlflags;
+ enum mpls_payload_type rc_payload_type;
+ struct nl_info rc_nlinfo;
};
static struct mpls_route *mpls_rt_alloc(size_t alen)
@@ -286,7 +307,7 @@ static void mpls_notify_route(struct net *net, unsigned index,
struct mpls_route *rt = new ? new : old;
unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
/* Ignore reserved labels for now */
- if (rt && (index >= 16))
+ if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
}
@@ -320,13 +341,96 @@ static unsigned find_free_label(struct net *net)
platform_label = rtnl_dereference(net->mpls.platform_label);
platform_labels = net->mpls.platform_labels;
- for (index = 16; index < platform_labels; index++) {
+ for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels;
+ index++) {
if (!rtnl_dereference(platform_label[index]))
return index;
}
return LABEL_NOT_SPECIFIED;
}
+#if IS_ENABLED(CONFIG_INET)
+static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ struct in_addr daddr;
+
+ memcpy(&daddr, addr, sizeof(struct in_addr));
+ rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ dev_hold(dev);
+
+ ip_rt_put(rt);
+
+ return dev;
+}
+#else
+static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+{
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+{
+ struct net_device *dev;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ int err;
+
+ if (!ipv6_stub)
+ return ERR_PTR(-EAFNOSUPPORT);
+
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
+ err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
+ if (err)
+ return ERR_PTR(err);
+
+ dev = dst->dev;
+ dev_hold(dev);
+ dst_release(dst);
+
+ return dev;
+}
+#else
+static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+{
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+#endif
+
+static struct net_device *find_outdev(struct net *net,
+ struct mpls_route_config *cfg)
+{
+ struct net_device *dev = NULL;
+
+ if (!cfg->rc_ifindex) {
+ switch (cfg->rc_via_table) {
+ case NEIGH_ARP_TABLE:
+ dev = inet_fib_lookup_dev(net, cfg->rc_via);
+ break;
+ case NEIGH_ND_TABLE:
+ dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+ break;
+ case NEIGH_LINK_TABLE:
+ break;
+ }
+ } else {
+ dev = dev_get_by_index(net, cfg->rc_ifindex);
+ }
+
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ return dev;
+}
+
static int mpls_route_add(struct mpls_route_config *cfg)
{
struct mpls_route __rcu **platform_label;
@@ -345,8 +449,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
index = find_free_label(net);
}
- /* The first 16 labels are reserved, and may not be set */
- if (index < 16)
+ /* Reserved labels may not be set */
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
/* The full 20 bit range may not be supported. */
@@ -357,10 +461,12 @@ static int mpls_route_add(struct mpls_route_config *cfg)
if (cfg->rc_output_labels > MAX_NEW_LABELS)
goto errout;
- err = -ENODEV;
- dev = dev_get_by_index(net, cfg->rc_ifindex);
- if (!dev)
+ dev = find_outdev(net, cfg);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ dev = NULL;
goto errout;
+ }
/* Ensure this is a supported device */
err = -EINVAL;
@@ -401,6 +507,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
rt->rt_label[i] = cfg->rc_output_label[i];
rt->rt_protocol = cfg->rc_protocol;
RCU_INIT_POINTER(rt->rt_dev, dev);
+ rt->rt_payload_type = cfg->rc_payload_type;
rt->rt_via_table = cfg->rc_via_table;
memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
@@ -423,8 +530,8 @@ static int mpls_route_del(struct mpls_route_config *cfg)
index = cfg->rc_label;
- /* The first 16 labels are reserved, and may not be removed */
- if (index < 16)
+ /* Reserved labels may not be removed */
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
/* The full 20 bit range may not be supported */
@@ -626,6 +733,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
return 0;
}
+EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
u32 max_labels, u32 *labels, u32 label[])
@@ -671,6 +779,7 @@ int nla_get_labels(const struct nlattr *nla,
*labels = nla_labels;
return 0;
}
+EXPORT_SYMBOL_GPL(nla_get_labels);
static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct mpls_route_config *cfg)
@@ -740,8 +849,8 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
&cfg->rc_label))
goto errout;
- /* The first 16 labels are reserved, and may not be set */
- if (cfg->rc_label < 16)
+ /* Reserved labels may not be set */
+ if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
break;
@@ -866,8 +975,8 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
ASSERT_RTNL();
index = cb->args[0];
- if (index < 16)
- index = 16;
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
+ index = MPLS_LABEL_FIRST_UNRESERVED;
platform_label = rtnl_dereference(net->mpls.platform_label);
platform_labels = net->mpls.platform_labels;
@@ -953,6 +1062,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
goto nort0;
RCU_INIT_POINTER(rt0->rt_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
+ rt0->rt_payload_type = MPT_IPV4;
rt0->rt_via_table = NEIGH_LINK_TABLE;
memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
}
@@ -963,6 +1073,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
goto nort2;
RCU_INIT_POINTER(rt2->rt_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
+ rt2->rt_payload_type = MPT_IPV6;
rt2->rt_via_table = NEIGH_LINK_TABLE;
memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
}
@@ -1066,8 +1177,10 @@ static int mpls_net_init(struct net *net)
table[0].data = net;
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
- if (net->mpls.ctl == NULL)
+ if (net->mpls.ctl == NULL) {
+ kfree(table);
return -ENOMEM;
+ }
return 0;
}