From ef493bd930ae482c608c5999b40d79dda3f2a674 Mon Sep 17 00:00:00 2001 From: Roman Kubiak Date: Fri, 12 Jun 2015 12:32:57 +0200 Subject: netfilter: nfnetlink_queue: add security context information This patch adds an additional attribute when sending packet information via netlink in netfilter_queue module. It will send additional security context data, so that userspace applications can verify this context against their own security databases. Signed-off-by: Roman Kubiak Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 8dd819e..b67a853 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -49,6 +49,7 @@ enum nfqnl_attr_type { NFQA_EXP, /* nf_conntrack_netlink.h */ NFQA_UID, /* __u32 sk uid */ NFQA_GID, /* __u32 sk gid */ + NFQA_SECCTX, /* security context string */ __NFQA_MAX }; @@ -102,7 +103,8 @@ enum nfqnl_attr_config { #define NFQA_CFG_F_CONNTRACK (1 << 1) #define NFQA_CFG_F_GSO (1 << 2) #define NFQA_CFG_F_UID_GID (1 << 3) -#define NFQA_CFG_F_MAX (1 << 4) +#define NFQA_CFG_F_SECCTX (1 << 4) +#define NFQA_CFG_F_MAX (1 << 5) /* flags for NFQA_SKB_INFO */ /* packet appears to have wrong checksums, but they are ok */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 22a5ac7..6eccf0f 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -278,6 +278,23 @@ nla_put_failure: return -1; } +static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) +{ + u32 seclen = 0; +#if IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (!skb || !sk_fullsock(skb->sk)) + return 0; + + read_lock_bh(&skb->sk->sk_callback_lock); + + if (skb->secmark) + security_secid_to_secctx(skb->secmark, secdata, &seclen); + + read_unlock_bh(&skb->sk->sk_callback_lock); +#endif + return seclen; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); bool csum_verify; + char *secdata = NULL; + u32 seclen = 0; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t))); /* gid */ } + if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) { + seclen = nfqnl_get_sk_secctx(entskb, &secdata); + if (seclen) + size += nla_total_size(seclen); + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) { @@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) goto nla_put_failure; + if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; @@ -1142,7 +1170,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EOPNOTSUPP; goto err_out_unlock; } - +#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (flags & mask & NFQA_CFG_F_SECCTX) { + ret = -EOPNOTSUPP; + goto err_out_unlock; + } +#endif spin_lock_bh(&queue->lock); queue->flags &= ~mask; queue->flags |= flags & mask; -- cgit v0.10.2 From 01555e74bde51444c6898ef1800fb2bc697d479e Mon Sep 17 00:00:00 2001 From: Harout Hedeshian Date: Mon, 15 Jun 2015 18:40:43 -0600 Subject: netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag xt_socket is useful for matching sockets with IP_TRANSPARENT and taking some action on the matching packets. However, it lacks the ability to match only a small subset of transparent sockets. Suppose there are 2 applications, each with its own set of transparent sockets. The first application wants all matching packets dropped, while the second application wants them forwarded somewhere else. Add the ability to retore the skb->mark from the sk_mark. The mark is only restored if a matching socket is found and the transparent / nowildcard conditions are satisfied. Now the 2 hypothetical applications can differentiate their sockets based on a mark value set with SO_MARK. iptables -t mangle -I PREROUTING -m socket --transparent \ --restore-skmark -j action iptables -t mangle -A action -m mark --mark 10 -j action2 iptables -t mangle -A action -m mark --mark 11 -j action3 Signed-off-by: Harout Hedeshian Signed-off-by: Pablo Neira Ayuso diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 6315e2a..87644f8 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -6,6 +6,7 @@ enum { XT_SOCKET_TRANSPARENT = 1 << 0, XT_SOCKET_NOWILDCARD = 1 << 1, + XT_SOCKET_RESTORESKMARK = 1 << 2, }; struct xt_socket_mtinfo1 { @@ -18,4 +19,11 @@ struct xt_socket_mtinfo2 { }; #define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD) +struct xt_socket_mtinfo3 { + __u8 flags; +}; +#define XT_SOCKET_FLAGS_V3 (XT_SOCKET_TRANSPARENT \ + | XT_SOCKET_NOWILDCARD \ + | XT_SOCKET_RESTORESKMARK) + #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e092cb0..43e26c8 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -205,6 +205,7 @@ static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) { + struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (!sk) @@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, if (info->flags & XT_SOCKET_TRANSPARENT) transparent = xt_socket_sk_is_transparent(sk); + if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && + transparent) + pskb->mark = sk->sk_mark; + if (sk != skb->sk) sock_gen_put(sk); @@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, } static bool -socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (!sk) @@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) if (info->flags & XT_SOCKET_TRANSPARENT) transparent = xt_socket_sk_is_transparent(sk); + if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && + transparent) + pskb->mark = sk->sk_mark; + if (sk != skb->sk) sock_gen_put(sk); @@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par) return 0; } +static int socket_mt_v3_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo3 *info = + (struct xt_socket_mtinfo3 *)par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V3) { + pr_info("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V3); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1_v2, + .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1_v2, + .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 2, .family = NFPROTO_IPV4, - .match = socket_mt4_v1_v2, + .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -478,7 +501,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 2, .family = NFPROTO_IPV6, - .match = socket_mt6_v1_v2, + .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -486,6 +509,30 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .me = THIS_MODULE, }, #endif + { + .name = "socket", + .revision = 3, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2_v3, + .checkentry = socket_mt_v3_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 3, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2_v3, + .checkentry = socket_mt_v3_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif }; static int __init socket_mt_init(void) -- cgit v0.10.2 From a1a56aaa0735c7821e85c37268a7c12e132415cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 18:10:13 -0700 Subject: netfilter: x_tables: align per cpu xt_counter Let's force a 16 bytes alignment on xt_counter percpu allocations, so that bytes and packets sit in same cache line. xt_counter being exported to user space, we cannot add __align(16) on the structure itself. Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 95693c4..1c97a22 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -356,7 +356,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * so nothing needs to be done there. * * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. + * counter, or 0 on !SMP. We force an alignment of 16 bytes + * so that bytes/packets share a common cache line. * * Hence caller must use IS_ERR_VALUE to check for error, this * allows us to return 0 for single core systems without forcing @@ -365,7 +366,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, static inline u64 xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { - void __percpu *res = alloc_percpu(struct xt_counters); + void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), + sizeof(struct xt_counters)); if (res == NULL) return (u64) -ENOMEM; -- cgit v0.10.2 From c4e70a87d975d1f561a00abfe2d3cefa2a486c95 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Jun 2015 13:38:26 +0200 Subject: netfilter: bridge: rename br_netfilter.c to br_netfilter_hooks.c To prepare separation of the IPv6 code into different file. Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/Makefile b/net/bridge/Makefile index fd7ee03..c52577a 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -12,6 +12,7 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o +br_netfilter-y := br_netfilter_hooks.o obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c deleted file mode 100644 index e4e5f2f..0000000 --- a/net/bridge/br_netfilter.c +++ /dev/null @@ -1,1282 +0,0 @@ -/* - * Handle firewalling - * Linux ethernet bridge - * - * Authors: - * Lennert Buytenhek - * Bart De Schuymer - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Lennert dedicates this file to Kerstin Wurdinger. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include "br_private.h" -#ifdef CONFIG_SYSCTL -#include -#endif - -#ifdef CONFIG_SYSCTL -static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables __read_mostly = 1; -static int brnf_call_ip6tables __read_mostly = 1; -static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly = 0; -static int brnf_filter_pppoe_tagged __read_mostly = 0; -static int brnf_pass_vlan_indev __read_mostly = 0; -#else -#define brnf_call_iptables 1 -#define brnf_call_ip6tables 1 -#define brnf_call_arptables 1 -#define brnf_filter_vlan_tagged 0 -#define brnf_filter_pppoe_tagged 0 -#define brnf_pass_vlan_indev 0 -#endif - -#define IS_IP(skb) \ - (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) - -#define IS_IPV6(skb) \ - (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) - -#define IS_ARP(skb) \ - (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) - -static inline __be16 vlan_proto(const struct sk_buff *skb) -{ - if (skb_vlan_tag_present(skb)) - return skb->protocol; - else if (skb->protocol == htons(ETH_P_8021Q)) - return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; - else - return 0; -} - -#define IS_VLAN_IP(skb) \ - (vlan_proto(skb) == htons(ETH_P_IP) && \ - brnf_filter_vlan_tagged) - -#define IS_VLAN_IPV6(skb) \ - (vlan_proto(skb) == htons(ETH_P_IPV6) && \ - brnf_filter_vlan_tagged) - -#define IS_VLAN_ARP(skb) \ - (vlan_proto(skb) == htons(ETH_P_ARP) && \ - brnf_filter_vlan_tagged) - -static inline __be16 pppoe_proto(const struct sk_buff *skb) -{ - return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + - sizeof(struct pppoe_hdr))); -} - -#define IS_PPPOE_IP(skb) \ - (skb->protocol == htons(ETH_P_PPP_SES) && \ - pppoe_proto(skb) == htons(PPP_IP) && \ - brnf_filter_pppoe_tagged) - -#define IS_PPPOE_IPV6(skb) \ - (skb->protocol == htons(ETH_P_PPP_SES) && \ - pppoe_proto(skb) == htons(PPP_IPV6) && \ - brnf_filter_pppoe_tagged) - -/* largest possible L2 header, see br_nf_dev_queue_xmit() */ -#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) - -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -struct brnf_frag_data { - char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; - u8 encap_size; - u8 size; - u16 vlan_tci; - __be16 vlan_proto; -}; - -static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); -#endif - -static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - -static void nf_bridge_info_free(struct sk_buff *skb) -{ - if (skb->nf_bridge) { - nf_bridge_put(skb->nf_bridge); - skb->nf_bridge = NULL; - } -} - -static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -{ - struct net_bridge_port *port; - - port = br_port_get_rcu(dev); - return port ? &port->br->fake_rtable : NULL; -} - -static inline struct net_device *bridge_parent(const struct net_device *dev) -{ - struct net_bridge_port *port; - - port = br_port_get_rcu(dev); - return port ? port->br->dev : NULL; -} - -static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) -{ - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); - if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); - - return skb->nf_bridge; -} - -static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - if (atomic_read(&nf_bridge->use) > 1) { - struct nf_bridge_info *tmp = nf_bridge_alloc(skb); - - if (tmp) { - memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); - atomic_set(&tmp->use, 1); - } - nf_bridge_put(nf_bridge); - nf_bridge = tmp; - } - return nf_bridge; -} - -static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) -{ - switch (skb->protocol) { - case __cpu_to_be16(ETH_P_8021Q): - return VLAN_HLEN; - case __cpu_to_be16(ETH_P_PPP_SES): - return PPPOE_SES_HLEN; - default: - return 0; - } -} - -static inline void nf_bridge_push_encap_header(struct sk_buff *skb) -{ - unsigned int len = nf_bridge_encap_header_len(skb); - - skb_push(skb, len); - skb->network_header -= len; -} - -static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) -{ - unsigned int len = nf_bridge_encap_header_len(skb); - - skb_pull(skb, len); - skb->network_header += len; -} - -static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) -{ - unsigned int len = nf_bridge_encap_header_len(skb); - - skb_pull_rcsum(skb, len); - skb->network_header += len; -} - -/* When handing a packet over to the IP layer - * check whether we have a skb that is in the - * expected format - */ - -static int br_validate_ipv4(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct net_device *dev = skb->dev; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto inhdr_error; - - iph = ip_hdr(skb); - - /* Basic sanity checks */ - if (iph->ihl < 5 || iph->version != 4) - goto inhdr_error; - - if (!pskb_may_pull(skb, iph->ihl*4)) - goto inhdr_error; - - iph = ip_hdr(skb); - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - goto inhdr_error; - - len = ntohs(iph->tot_len); - if (skb->len < len) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } else if (len < (iph->ihl*4)) - goto inhdr_error; - - if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); - goto drop; - } - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - /* We should really parse IP options here but until - * somebody who actually uses IP options complains to - * us we'll just silently ignore the options because - * we're lazy! - */ - return 0; - -inhdr_error: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); -drop: - return -1; -} - -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; -} - -/* Equivalent to br_validate_ipv4 for IPv6 */ -static int br_validate_ipv6(struct sk_buff *skb) -{ - const struct ipv6hdr *hdr; - struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); - u32 pkt_len; - u8 ip6h_len = sizeof(struct ipv6hdr); - - if (!pskb_may_pull(skb, ip6h_len)) - goto inhdr_error; - - if (skb->len < ip6h_len) - goto drop; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - goto inhdr_error; - - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) - goto drop; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - /* No IP options in IPv6 header; however it should be - * checked if some next headers need special treatment - */ - return 0; - -inhdr_error: - IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); -drop: - return -1; -} - -static void nf_bridge_update_protocol(struct sk_buff *skb) -{ - switch (skb->nf_bridge->orig_proto) { - case BRNF_PROTO_8021Q: - skb->protocol = htons(ETH_P_8021Q); - break; - case BRNF_PROTO_PPPOE: - skb->protocol = htons(ETH_P_PPP_SES); - break; - case BRNF_PROTO_UNCHANGED: - break; - } -} - -/* Obtain the correct destination MAC address, while preserving the original - * source MAC address. If we already know this address, we just copy it. If we - * don't, we use the neighbour framework to find out. In both cases, we make - * sure that br_handle_frame_finish() is called afterwards. - */ -static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) -{ - struct neighbour *neigh; - struct dst_entry *dst; - - skb->dev = bridge_parent(skb->dev); - if (!skb->dev) - goto free_skb; - dst = skb_dst(skb); - neigh = dst_neigh_lookup_skb(dst, skb); - if (neigh) { - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - int ret; - - if (neigh->hh.hh_len) { - neigh_hh_bridge(&neigh->hh, skb); - skb->dev = nf_bridge->physindev; - ret = br_handle_frame_finish(sk, skb); - } else { - /* the neighbour function below overwrites the complete - * MAC header, so we save the Ethernet source address and - * protocol number. - */ - skb_copy_from_linear_data_offset(skb, - -(ETH_HLEN-ETH_ALEN), - nf_bridge->neigh_header, - ETH_HLEN-ETH_ALEN); - /* tell br_dev_xmit to continue with forwarding */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; - /* FIXME Need to refragment */ - ret = neigh->output(neigh, skb); - } - neigh_release(neigh); - return ret; - } -free_skb: - kfree_skb(skb); - return 0; -} - -static bool daddr_was_changed(const struct sk_buff *skb, - const struct nf_bridge_info *nf_bridge) -{ - switch (skb->protocol) { - case htons(ETH_P_IP): - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; - case htons(ETH_P_IPV6): - return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, - sizeof(ipv6_hdr(skb)->daddr)) != 0; - default: - return false; - } -} - -/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables - * PREROUTING and continue the bridge PRE_ROUTING hook. See comment - * for br_nf_pre_routing_finish(), same logic is used here but - * equivalent IPv6 function ip6_route_input() called indirectly. - */ -static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct rtable *rt; - struct net_device *dev = skb->dev; - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - - nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; - - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { - skb_dst_drop(skb); - v6ops->route_input(skb); - - if (skb_dst(skb)->error) { - kfree_skb(skb); - return 0; - } - - if (skb_dst(skb)->dev == dev) { - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); - return 0; - } - ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); - skb->pkt_type = PACKET_HOST; - } else { - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; - } - skb_dst_set_noref(skb, &rt->dst); - } - - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - - return 0; -} - -/* This requires some explaining. If DNAT has taken place, - * we will need to fix up the destination Ethernet address. - * This is also true when SNAT takes place (for the reply direction). - * - * There are two cases to consider: - * 1. The packet was DNAT'ed to a device in the same bridge - * port group as it was received on. We can still bridge - * the packet. - * 2. The packet was DNAT'ed to a different device, either - * a non-bridged device or another bridge port group. - * The packet will need to be routed. - * - * The correct way of distinguishing between these two cases is to - * call ip_route_input() and to look at skb->dst->dev, which is - * changed to the destination device if ip_route_input() succeeds. - * - * Let's first consider the case that ip_route_input() succeeds: - * - * If the output device equals the logical bridge device the packet - * came in on, we can consider this bridging. The corresponding MAC - * address will be obtained in br_nf_pre_routing_finish_bridge. - * Otherwise, the packet is considered to be routed and we just - * change the destination MAC address so that the packet will - * later be passed up to the IP stack to be routed. For a redirected - * packet, ip_route_input() will give back the localhost as output device, - * which differs from the bridge device. - * - * Let's now consider the case that ip_route_input() fails: - * - * This can be because the destination address is martian, in which case - * the packet will be dropped. - * If IP forwarding is disabled, ip_route_input() will fail, while - * ip_route_output_key() can return success. The source - * address for ip_route_output_key() is set to zero, so ip_route_output_key() - * thinks we're handling a locally generated packet and won't care - * if IP forwarding is enabled. If the output device equals the logical bridge - * device, we proceed as if ip_route_input() succeeded. If it differs from the - * logical bridge port or if ip_route_output_key() fails we drop the packet. - */ -static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct iphdr *iph = ip_hdr(skb); - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct rtable *rt; - int err; - - nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; - - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { - if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - - /* If err equals -EHOSTUNREACH the error is due to a - * martian destination or due to the fact that - * forwarding is disabled. For most martian packets, - * ip_route_output_key() will fail. It won't fail for 2 types of - * martian destinations: loopback destinations and destination - * 0.0.0.0. In both cases the packet will be dropped because the - * destination is the loopback device and not the bridge. */ - if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) - goto free_skb; - - rt = ip_route_output(dev_net(dev), iph->daddr, 0, - RT_TOS(iph->tos), 0); - if (!IS_ERR(rt)) { - /* - Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. */ - if (rt->dst.dev == dev) { - skb_dst_set(skb, &rt->dst); - goto bridged_dnat; - } - ip_rt_put(rt); - } -free_skb: - kfree_skb(skb); - return 0; - } else { - if (skb_dst(skb)->dev == dev) { -bridged_dnat: - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, - NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); - return 0; - } - ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); - skb->pkt_type = PACKET_HOST; - } - } else { - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; - } - skb_dst_set_noref(skb, &rt->dst); - } - - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - - return 0; -} - -static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) -{ - struct net_device *vlan, *br; - - br = bridge_parent(dev); - if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) - return br; - - vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, - skb_vlan_tag_get(skb) & VLAN_VID_MASK); - - return vlan ? vlan : br; -} - -/* Some common code for IPv4/IPv6 */ -static struct net_device *setup_pre_routing(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - - if (skb->pkt_type == PACKET_OTHERHOST) { - skb->pkt_type = PACKET_HOST; - nf_bridge->pkt_otherhost = true; - } - - nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; - nf_bridge->physindev = skb->dev; - skb->dev = brnf_get_logical_dev(skb, skb->dev); - - if (skb->protocol == htons(ETH_P_8021Q)) - nf_bridge->orig_proto = BRNF_PROTO_8021Q; - else if (skb->protocol == htons(ETH_P_PPP_SES)) - nf_bridge->orig_proto = BRNF_PROTO_PPPOE; - - /* Must drop socket now because of tproxy. */ - skb_orphan(skb); - return skb->dev; -} - -/* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables. - */ -static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge; - - if (br_validate_ipv6(skb)) - return NF_DROP; - - nf_bridge_put(skb->nf_bridge); - if (!nf_bridge_alloc(skb)) - return NF_DROP; - if (!setup_pre_routing(skb)) - return NF_DROP; - - nf_bridge = nf_bridge_info_get(skb); - nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; - - skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, - skb->dev, NULL, - br_nf_pre_routing_finish_ipv6); - - return NF_STOLEN; -} - -/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. - * Replicate the checks that IPv4 does on packet reception. - * Set skb->dev to the bridge device (i.e. parent of the - * receiving device) to make netfilter happy, the REDIRECT - * target in particular. Save the original destination IP - * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge; - struct net_bridge_port *p; - struct net_bridge *br; - __u32 len = nf_bridge_encap_header_len(skb); - - if (unlikely(!pskb_may_pull(skb, len))) - return NF_DROP; - - p = br_port_get_rcu(state->in); - if (p == NULL) - return NF_DROP; - br = p->br; - - if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { - if (!brnf_call_ip6tables && !br->nf_call_ip6tables) - return NF_ACCEPT; - - nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(ops, skb, state); - } - - if (!brnf_call_iptables && !br->nf_call_iptables) - return NF_ACCEPT; - - if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) - return NF_ACCEPT; - - nf_bridge_pull_encap_header_rcsum(skb); - - if (br_validate_ipv4(skb)) - return NF_DROP; - - nf_bridge_put(skb->nf_bridge); - if (!nf_bridge_alloc(skb)) - return NF_DROP; - if (!setup_pre_routing(skb)) - return NF_DROP; - - nf_bridge = nf_bridge_info_get(skb); - nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; - - skb->protocol = htons(ETH_P_IP); - - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, - skb->dev, NULL, - br_nf_pre_routing_finish); - - return NF_STOLEN; -} - - -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - -/* PF_BRIDGE/FORWARD *************************************************/ -static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct net_device *in; - - if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - - if (skb->protocol == htons(ETH_P_IP)) - nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; - - if (skb->protocol == htons(ETH_P_IPV6)) - nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; - - in = nf_bridge->physindev; - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge_update_protocol(skb); - } else { - in = *((struct net_device **)(skb->cb)); - } - nf_bridge_push_encap_header(skb); - - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, - in, skb->dev, br_forward_finish, 1); - return 0; -} - - -/* This is the 'purely bridged' case. For IP, we pass the packet to - * netfilter with indev and outdev set to the bridge device, - * but we are still able to filter on the 'real' indev/outdev - * because of the physdev module. For ARP, indev and outdev are the - * bridge ports. */ -static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge; - struct net_device *parent; - u_int8_t pf; - - if (!skb->nf_bridge) - return NF_ACCEPT; - - /* Need exclusive nf_bridge_info since we might have multiple - * different physoutdevs. */ - if (!nf_bridge_unshare(skb)) - return NF_DROP; - - nf_bridge = nf_bridge_info_get(skb); - if (!nf_bridge) - return NF_DROP; - - parent = bridge_parent(state->out); - if (!parent) - return NF_DROP; - - if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) - pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) - pf = NFPROTO_IPV6; - else - return NF_ACCEPT; - - nf_bridge_pull_encap_header(skb); - - if (skb->pkt_type == PACKET_OTHERHOST) { - skb->pkt_type = PACKET_HOST; - nf_bridge->pkt_otherhost = true; - } - - if (pf == NFPROTO_IPV4) { - if (br_validate_ipv4(skb)) - return NF_DROP; - IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - } - - if (pf == NFPROTO_IPV6) { - if (br_validate_ipv6(skb)) - return NF_DROP; - IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; - } - - nf_bridge->physoutdev = skb->dev; - if (pf == NFPROTO_IPV4) - skb->protocol = htons(ETH_P_IP); - else - skb->protocol = htons(ETH_P_IPV6); - - NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, - brnf_get_logical_dev(skb, state->in), - parent, br_nf_forward_finish); - - return NF_STOLEN; -} - -static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct net_bridge_port *p; - struct net_bridge *br; - struct net_device **d = (struct net_device **)(skb->cb); - - p = br_port_get_rcu(state->out); - if (p == NULL) - return NF_ACCEPT; - br = p->br; - - if (!brnf_call_arptables && !br->nf_call_arptables) - return NF_ACCEPT; - - if (!IS_ARP(skb)) { - if (!IS_VLAN_ARP(skb)) - return NF_ACCEPT; - nf_bridge_pull_encap_header(skb); - } - - if (arp_hdr(skb)->ar_pln != 4) { - if (IS_VLAN_ARP(skb)) - nf_bridge_push_encap_header(skb); - return NF_ACCEPT; - } - *d = state->in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, - state->in, state->out, br_nf_forward_finish); - - return NF_STOLEN; -} - -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) -{ - struct brnf_frag_data *data; - int err; - - data = this_cpu_ptr(&brnf_frag_data_storage); - err = skb_cow_head(skb, data->size); - - if (err) { - kfree_skb(skb); - return 0; - } - - if (data->vlan_tci) { - skb->vlan_tci = data->vlan_tci; - skb->vlan_proto = data->vlan_proto; - } - - skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); - __skb_push(skb, data->encap_size); - - nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); -} -#endif - -static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) -{ - unsigned int mtu = ip_skb_dst_mtu(skb); - struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; - - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || - (IPCB(skb)->frag_max_size && - IPCB(skb)->frag_max_size > mtu))) { - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); - return -EMSGSIZE; - } - - return ip_do_fragment(sk, skb, output); -} - -static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) -{ - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) - return PPPOE_SES_HLEN; - return 0; -} - -static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; - - mtu_reserved = nf_bridge_mtu_reduction(skb); - - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { - nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); - } - - nf_bridge = nf_bridge_info_get(skb); - -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) - /* This is wrong! We should preserve the original fragment - * boundaries by preserving frag_list rather than refragmenting. - */ - if (skb->protocol == htons(ETH_P_IP)) { - struct brnf_frag_data *data; - - if (br_validate_ipv4(skb)) - return NF_DROP; - - IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - - nf_bridge_update_protocol(skb); - - data = this_cpu_ptr(&brnf_frag_data_storage); - - data->vlan_tci = skb->vlan_tci; - data->vlan_proto = skb->vlan_proto; - data->encap_size = nf_bridge_encap_header_len(skb); - data->size = ETH_HLEN + data->encap_size; - - skb_copy_from_linear_data_offset(skb, -data->size, data->mac, - data->size); - - return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); - } -#endif -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - if (skb->protocol == htons(ETH_P_IPV6)) { - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - struct brnf_frag_data *data; - - if (br_validate_ipv6(skb)) - return NF_DROP; - - IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; - - nf_bridge_update_protocol(skb); - - data = this_cpu_ptr(&brnf_frag_data_storage); - data->encap_size = nf_bridge_encap_header_len(skb); - data->size = ETH_HLEN + data->encap_size; - - skb_copy_from_linear_data_offset(skb, -data->size, data->mac, - data->size); - - if (v6ops) - return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; - } -#endif - nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); -} - -/* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct net_device *realoutdev = bridge_parent(skb->dev); - u_int8_t pf; - - /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in - * on a bridge, but was delivered locally and is now being routed: - * - * POST_ROUTING was already invoked from the ip stack. - */ - if (!nf_bridge || !nf_bridge->physoutdev) - return NF_ACCEPT; - - if (!realoutdev) - return NF_DROP; - - if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) - pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) - pf = NFPROTO_IPV6; - else - return NF_ACCEPT; - - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care - * about the value of skb->pkt_type. */ - if (skb->pkt_type == PACKET_OTHERHOST) { - skb->pkt_type = PACKET_HOST; - nf_bridge->pkt_otherhost = true; - } - - nf_bridge_pull_encap_header(skb); - if (pf == NFPROTO_IPV4) - skb->protocol = htons(ETH_P_IP); - else - skb->protocol = htons(ETH_P_IPV6); - - NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, - NULL, realoutdev, - br_nf_dev_queue_xmit); - - return NF_STOLEN; -} - -/* IP/SABOTAGE *****************************************************/ -/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING - * for the second time. */ -static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (skb->nf_bridge && - !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { - return NF_STOP; - } - - return NF_ACCEPT; -} - -/* This is called when br_netfilter has called into iptables/netfilter, - * and DNAT has taken place on a bridge-forwarded packet. - * - * neigh->output has created a new MAC header, with local br0 MAC - * as saddr. - * - * This restores the original MAC saddr of the bridged packet - * before invoking bridge forward logic to transmit the packet. - */ -static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - - skb_pull(skb, ETH_HLEN); - nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; - - BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); - - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), - nf_bridge->neigh_header, - ETH_HLEN - ETH_ALEN); - skb->dev = nf_bridge->physindev; - - nf_bridge->physoutdev = NULL; - br_handle_frame_finish(NULL, skb); -} - -static int br_nf_dev_xmit(struct sk_buff *skb) -{ - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); - return 1; - } - return 0; -} - -static const struct nf_br_ops br_ops = { - .br_dev_xmit_hook = br_nf_dev_xmit, -}; - -void br_netfilter_enable(void) -{ -} -EXPORT_SYMBOL_GPL(br_netfilter_enable); - -/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because - * br_dev_queue_push_xmit is called afterwards */ -static struct nf_hook_ops br_nf_ops[] __read_mostly = { - { - .hook = br_nf_pre_routing, - .owner = THIS_MODULE, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_PRE_ROUTING, - .priority = NF_BR_PRI_BRNF, - }, - { - .hook = br_nf_local_in, - .owner = THIS_MODULE, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, - { - .hook = br_nf_forward_ip, - .owner = THIS_MODULE, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF - 1, - }, - { - .hook = br_nf_forward_arp, - .owner = THIS_MODULE, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF, - }, - { - .hook = br_nf_post_routing, - .owner = THIS_MODULE, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_POST_ROUTING, - .priority = NF_BR_PRI_LAST, - }, - { - .hook = ip_sabotage_in, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_FIRST, - }, - { - .hook = ip_sabotage_in, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_FIRST, - }, -}; - -#ifdef CONFIG_SYSCTL -static -int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret; - - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); - - if (write && *(int *)(ctl->data)) - *(int *)(ctl->data) = 1; - return ret; -} - -static struct ctl_table brnf_table[] = { - { - .procname = "bridge-nf-call-arptables", - .data = &brnf_call_arptables, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { - .procname = "bridge-nf-call-iptables", - .data = &brnf_call_iptables, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { - .procname = "bridge-nf-call-ip6tables", - .data = &brnf_call_ip6tables, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { - .procname = "bridge-nf-filter-vlan-tagged", - .data = &brnf_filter_vlan_tagged, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { - .procname = "bridge-nf-filter-pppoe-tagged", - .data = &brnf_filter_pppoe_tagged, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { - .procname = "bridge-nf-pass-vlan-input-dev", - .data = &brnf_pass_vlan_indev, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = brnf_sysctl_call_tables, - }, - { } -}; -#endif - -static int __init br_netfilter_init(void) -{ - int ret; - - ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - if (ret < 0) - return ret; - -#ifdef CONFIG_SYSCTL - brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); - if (brnf_sysctl_header == NULL) { - printk(KERN_WARNING - "br_netfilter: can't register to sysctl.\n"); - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - return -ENOMEM; - } -#endif - RCU_INIT_POINTER(nf_br_ops, &br_ops); - printk(KERN_NOTICE "Bridge firewalling registered\n"); - return 0; -} - -static void __exit br_netfilter_fini(void) -{ - RCU_INIT_POINTER(nf_br_ops, NULL); - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(brnf_sysctl_header); -#endif -} - -module_init(br_netfilter_init); -module_exit(br_netfilter_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Lennert Buytenhek "); -MODULE_AUTHOR("Bart De Schuymer "); -MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c new file mode 100644 index 0000000..e4e5f2f --- /dev/null +++ b/net/bridge/br_netfilter_hooks.c @@ -0,0 +1,1282 @@ +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * Bart De Schuymer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include +#endif + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *brnf_sysctl_header; +static int brnf_call_iptables __read_mostly = 1; +static int brnf_call_ip6tables __read_mostly = 1; +static int brnf_call_arptables __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 0; +static int brnf_filter_pppoe_tagged __read_mostly = 0; +static int brnf_pass_vlan_indev __read_mostly = 0; +#else +#define brnf_call_iptables 1 +#define brnf_call_ip6tables 1 +#define brnf_call_arptables 1 +#define brnf_filter_vlan_tagged 0 +#define brnf_filter_pppoe_tagged 0 +#define brnf_pass_vlan_indev 0 +#endif + +#define IS_IP(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + +#define IS_IPV6(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + +#define IS_ARP(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + +static inline __be16 vlan_proto(const struct sk_buff *skb) +{ + if (skb_vlan_tag_present(skb)) + return skb->protocol; + else if (skb->protocol == htons(ETH_P_8021Q)) + return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; + else + return 0; +} + +#define IS_VLAN_IP(skb) \ + (vlan_proto(skb) == htons(ETH_P_IP) && \ + brnf_filter_vlan_tagged) + +#define IS_VLAN_IPV6(skb) \ + (vlan_proto(skb) == htons(ETH_P_IPV6) && \ + brnf_filter_vlan_tagged) + +#define IS_VLAN_ARP(skb) \ + (vlan_proto(skb) == htons(ETH_P_ARP) && \ + brnf_filter_vlan_tagged) + +static inline __be16 pppoe_proto(const struct sk_buff *skb) +{ + return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); +} + +#define IS_PPPOE_IP(skb) \ + (skb->protocol == htons(ETH_P_PPP_SES) && \ + pppoe_proto(skb) == htons(PPP_IP) && \ + brnf_filter_pppoe_tagged) + +#define IS_PPPOE_IPV6(skb) \ + (skb->protocol == htons(ETH_P_PPP_SES) && \ + pppoe_proto(skb) == htons(PPP_IPV6) && \ + brnf_filter_pppoe_tagged) + +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; + u16 vlan_tci; + __be16 vlan_proto; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +#endif + +static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + +static void nf_bridge_info_free(struct sk_buff *skb) +{ + if (skb->nf_bridge) { + nf_bridge_put(skb->nf_bridge); + skb->nf_bridge = NULL; + } +} + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +} + +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? port->br->dev : NULL; +} + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ + skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + if (likely(skb->nf_bridge)) + atomic_set(&(skb->nf_bridge->use), 1); + + return skb->nf_bridge; +} + +static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (atomic_read(&nf_bridge->use) > 1) { + struct nf_bridge_info *tmp = nf_bridge_alloc(skb); + + if (tmp) { + memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); + atomic_set(&tmp->use, 1); + } + nf_bridge_put(nf_bridge); + nf_bridge = tmp; + } + return nf_bridge; +} + +static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull(skb, len); + skb->network_header += len; +} + +static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull_rcsum(skb, len); + skb->network_header += len; +} + +/* When handing a packet over to the IP layer + * check whether we have a skb that is in the + * expected format + */ + +static int br_validate_ipv4(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct net_device *dev = skb->dev; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + + iph = ip_hdr(skb); + + /* Basic sanity checks */ + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + if (!pskb_may_pull(skb, iph->ihl*4)) + goto inhdr_error; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto inhdr_error; + + len = ntohs(iph->tot_len); + if (skb->len < len) { + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } else if (len < (iph->ihl*4)) + goto inhdr_error; + + if (pskb_trim_rcsum(skb, len)) { + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + goto drop; + } + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ + return 0; + +inhdr_error: + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff + * anyway + */ +static int check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + u32 pkt_len; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = nh[off + 1] + 2; + + switch (nh[off]) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (nh[off + 1] != 4 || (off & 3) != 2) + goto bad; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + goto bad; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pskb_trim_rcsum(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + nh = skb_network_header(skb); + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; +} + +/* Equivalent to br_validate_ipv4 for IPv6 */ +static int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + switch (skb->nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: + skb->protocol = htons(ETH_P_8021Q); + break; + case BRNF_PROTO_PPPOE: + skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } +} + +/* Obtain the correct destination MAC address, while preserving the original + * source MAC address. If we already know this address, we just copy it. If we + * don't, we use the neighbour framework to find out. In both cases, we make + * sure that br_handle_frame_finish() is called afterwards. + */ +static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) +{ + struct neighbour *neigh; + struct dst_entry *dst; + + skb->dev = bridge_parent(skb->dev); + if (!skb->dev) + goto free_skb; + dst = skb_dst(skb); + neigh = dst_neigh_lookup_skb(dst, skb); + if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + int ret; + + if (neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); + skb->dev = nf_bridge->physindev; + ret = br_handle_frame_finish(sk, skb); + } else { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. + */ + skb_copy_from_linear_data_offset(skb, + -(ETH_HLEN-ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + /* FIXME Need to refragment */ + ret = neigh->output(neigh, skb); + } + neigh_release(neigh); + return ret; + } +free_skb: + kfree_skb(skb); + return 0; +} + +static bool daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + case htons(ETH_P_IPV6): + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; + default: + return false; + } +} + +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. + */ +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); + + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +/* This requires some explaining. If DNAT has taken place, + * we will need to fix up the destination Ethernet address. + * This is also true when SNAT takes place (for the reply direction). + * + * There are two cases to consider: + * 1. The packet was DNAT'ed to a device in the same bridge + * port group as it was received on. We can still bridge + * the packet. + * 2. The packet was DNAT'ed to a different device, either + * a non-bridged device or another bridge port group. + * The packet will need to be routed. + * + * The correct way of distinguishing between these two cases is to + * call ip_route_input() and to look at skb->dst->dev, which is + * changed to the destination device if ip_route_input() succeeds. + * + * Let's first consider the case that ip_route_input() succeeds: + * + * If the output device equals the logical bridge device the packet + * came in on, we can consider this bridging. The corresponding MAC + * address will be obtained in br_nf_pre_routing_finish_bridge. + * Otherwise, the packet is considered to be routed and we just + * change the destination MAC address so that the packet will + * later be passed up to the IP stack to be routed. For a redirected + * packet, ip_route_input() will give back the localhost as output device, + * which differs from the bridge device. + * + * Let's now consider the case that ip_route_input() fails: + * + * This can be because the destination address is martian, in which case + * the packet will be dropped. + * If IP forwarding is disabled, ip_route_input() will fail, while + * ip_route_output_key() can return success. The source + * address for ip_route_output_key() is set to zero, so ip_route_output_key() + * thinks we're handling a locally generated packet and won't care + * if IP forwarding is enabled. If the output device equals the logical bridge + * device, we proceed as if ip_route_input() succeeded. If it differs from the + * logical bridge port or if ip_route_output_key() fails we drop the packet. + */ +static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct iphdr *iph = ip_hdr(skb); + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + int err; + + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (daddr_was_changed(skb, nf_bridge)) { + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + /* If err equals -EHOSTUNREACH the error is due to a + * martian destination or due to the fact that + * forwarding is disabled. For most martian packets, + * ip_route_output_key() will fail. It won't fail for 2 types of + * martian destinations: loopback destinations and destination + * 0.0.0.0. In both cases the packet will be dropped because the + * destination is the loopback device and not the bridge. */ + if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + goto free_skb; + + rt = ip_route_output(dev_net(dev), iph->daddr, 0, + RT_TOS(iph->tos), 0); + if (!IS_ERR(rt)) { + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. */ + if (rt->dst.dev == dev) { + skb_dst_set(skb, &rt->dst); + goto bridged_dnat; + } + ip_rt_put(rt); + } +free_skb: + kfree_skb(skb); + return 0; + } else { + if (skb_dst(skb)->dev == dev) { +bridged_dnat: + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, + NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *vlan, *br; + + br = bridge_parent(dev); + if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) + return br; + + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, + skb_vlan_tag_get(skb) & VLAN_VID_MASK); + + return vlan ? vlan : br; +} + +/* Some common code for IPv4/IPv6 */ +static struct net_device *setup_pre_routing(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->physindev = skb->dev; + skb->dev = brnf_get_logical_dev(skb, skb->dev); + + if (skb->protocol == htons(ETH_P_8021Q)) + nf_bridge->orig_proto = BRNF_PROTO_8021Q; + else if (skb->protocol == htons(ETH_P_PPP_SES)) + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; + + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return skb->dev; +} + +/* Replicate the checks that IPv6 does on packet reception and pass the packet + * to ip6tables. + */ +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + + if (br_validate_ipv6(skb)) + return NF_DROP; + + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IPV6); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish_ipv6); + + return NF_STOLEN; +} + +/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. + * Replicate the checks that IPv4 does on packet reception. + * Set skb->dev to the bridge device (i.e. parent of the + * receiving device) to make netfilter happy, the REDIRECT + * target in particular. Save the original destination IP + * address to be able to detect DNAT afterwards. */ +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + struct net_bridge_port *p; + struct net_bridge *br; + __u32 len = nf_bridge_encap_header_len(skb); + + if (unlikely(!pskb_may_pull(skb, len))) + return NF_DROP; + + p = br_port_get_rcu(state->in); + if (p == NULL) + return NF_DROP; + br = p->br; + + if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { + if (!brnf_call_ip6tables && !br->nf_call_ip6tables) + return NF_ACCEPT; + + nf_bridge_pull_encap_header_rcsum(skb); + return br_nf_pre_routing_ipv6(ops, skb, state); + } + + if (!brnf_call_iptables && !br->nf_call_iptables) + return NF_ACCEPT; + + if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) + return NF_ACCEPT; + + nf_bridge_pull_encap_header_rcsum(skb); + + if (br_validate_ipv4(skb)) + return NF_DROP; + + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IP); + + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish); + + return NF_STOLEN; +} + + +/* PF_BRIDGE/LOCAL_IN ************************************************/ +/* The packet is locally destined, which requires a real + * dst_entry, so detach the fake one. On the way up, the + * packet would pass through PRE_ROUTING again (which already + * took place when the packet entered the bridge), but we + * register an IPv4 PRE_ROUTING 'sabotage' hook that will + * prevent this from happening. */ +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + br_drop_fake_rtable(skb); + return NF_ACCEPT; +} + +/* PF_BRIDGE/FORWARD *************************************************/ +static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *in; + + if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + + if (skb->protocol == htons(ETH_P_IP)) + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; + + if (skb->protocol == htons(ETH_P_IPV6)) + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + in = nf_bridge->physindev; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge_update_protocol(skb); + } else { + in = *((struct net_device **)(skb->cb)); + } + nf_bridge_push_encap_header(skb); + + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, + in, skb->dev, br_forward_finish, 1); + return 0; +} + + +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the physdev module. For ARP, indev and outdev are the + * bridge ports. */ +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + struct net_device *parent; + u_int8_t pf; + + if (!skb->nf_bridge) + return NF_ACCEPT; + + /* Need exclusive nf_bridge_info since we might have multiple + * different physoutdevs. */ + if (!nf_bridge_unshare(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + + parent = bridge_parent(state->out); + if (!parent) + return NF_DROP; + + if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + pf = NFPROTO_IPV6; + else + return NF_ACCEPT; + + nf_bridge_pull_encap_header(skb); + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + if (pf == NFPROTO_IPV4) { + if (br_validate_ipv4(skb)) + return NF_DROP; + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + + if (pf == NFPROTO_IPV6) { + if (br_validate_ipv6(skb)) + return NF_DROP; + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + + nf_bridge->physoutdev = skb->dev; + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, + brnf_get_logical_dev(skb, state->in), + parent, br_nf_forward_finish); + + return NF_STOLEN; +} + +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct net_bridge_port *p; + struct net_bridge *br; + struct net_device **d = (struct net_device **)(skb->cb); + + p = br_port_get_rcu(state->out); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + if (!brnf_call_arptables && !br->nf_call_arptables) + return NF_ACCEPT; + + if (!IS_ARP(skb)) { + if (!IS_VLAN_ARP(skb)) + return NF_ACCEPT; + nf_bridge_pull_encap_header(skb); + } + + if (arp_hdr(skb)->ar_pln != 4) { + if (IS_VLAN_ARP(skb)) + nf_bridge_push_encap_header(skb); + return NF_ACCEPT; + } + *d = state->in; + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, + state->in, state->out, br_nf_forward_finish); + + return NF_STOLEN; +} + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) +static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) +{ + struct brnf_frag_data *data; + int err; + + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); + + if (err) { + kfree_skb(skb); + return 0; + } + + if (data->vlan_tci) { + skb->vlan_tci = data->vlan_tci; + skb->vlan_proto = data->vlan_proto; + } + + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(sk, skb); +} +#endif + +static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) +{ + unsigned int mtu = ip_skb_dst_mtu(skb); + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(sk, skb, output); +} + +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + return PPPOE_SES_HLEN; + return 0; +} + +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge; + unsigned int mtu_reserved; + + mtu_reserved = nf_bridge_mtu_reduction(skb); + + if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(sk, skb); + } + + nf_bridge = nf_bridge_info_get(skb); + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + /* This is wrong! We should preserve the original fragment + * boundaries by preserving frag_list rather than refragmenting. + */ + if (skb->protocol == htons(ETH_P_IP)) { + struct brnf_frag_data *data; + + if (br_validate_ipv4(skb)) + return NF_DROP; + + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); + } +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + struct brnf_frag_data *data; + + if (br_validate_ipv6(skb)) + return NF_DROP; + + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + if (v6ops) + return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); + else + return -EMSGSIZE; + } +#endif + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(sk, skb); +} + +/* PF_BRIDGE/POST_ROUTING ********************************************/ +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *realoutdev = bridge_parent(skb->dev); + u_int8_t pf; + + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) + return NF_ACCEPT; + + if (!realoutdev) + return NF_DROP; + + if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + pf = NFPROTO_IPV6; + else + return NF_ACCEPT; + + /* We assume any code from br_dev_queue_push_xmit onwards doesn't care + * about the value of skb->pkt_type. */ + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + nf_bridge_pull_encap_header(skb); + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, + NULL, realoutdev, + br_nf_dev_queue_xmit); + + return NF_STOLEN; +} + +/* IP/SABOTAGE *****************************************************/ +/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING + * for the second time. */ +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (skb->nf_bridge && + !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + return NF_STOP; + } + + return NF_ACCEPT; +} + +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); + skb->dev = nf_bridge->physindev; + + nf_bridge->physoutdev = NULL; + br_handle_frame_finish(NULL, skb); +} + +static int br_nf_dev_xmit(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} + +static const struct nf_br_ops br_ops = { + .br_dev_xmit_hook = br_nf_dev_xmit, +}; + +void br_netfilter_enable(void) +{ +} +EXPORT_SYMBOL_GPL(br_netfilter_enable); + +/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * br_dev_queue_push_xmit is called afterwards */ +static struct nf_hook_ops br_nf_ops[] __read_mostly = { + { + .hook = br_nf_pre_routing, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_local_in, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_forward_ip, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF - 1, + }, + { + .hook = br_nf_forward_arp, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_post_routing, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_LAST, + }, + { + .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_FIRST, + }, + { + .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST, + }, +}; + +#ifdef CONFIG_SYSCTL +static +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write && *(int *)(ctl->data)) + *(int *)(ctl->data) = 1; + return ret; +} + +static struct ctl_table brnf_table[] = { + { + .procname = "bridge-nf-call-arptables", + .data = &brnf_call_arptables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-call-iptables", + .data = &brnf_call_iptables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-call-ip6tables", + .data = &brnf_call_ip6tables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-filter-vlan-tagged", + .data = &brnf_filter_vlan_tagged, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-filter-pppoe-tagged", + .data = &brnf_filter_pppoe_tagged, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-pass-vlan-input-dev", + .data = &brnf_pass_vlan_indev, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { } +}; +#endif + +static int __init br_netfilter_init(void) +{ + int ret; + + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret < 0) + return ret; + +#ifdef CONFIG_SYSCTL + brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); + if (brnf_sysctl_header == NULL) { + printk(KERN_WARNING + "br_netfilter: can't register to sysctl.\n"); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return -ENOMEM; + } +#endif + RCU_INIT_POINTER(nf_br_ops, &br_ops); + printk(KERN_NOTICE "Bridge firewalling registered\n"); + return 0; +} + +static void __exit br_netfilter_fini(void) +{ + RCU_INIT_POINTER(nf_br_ops, NULL); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(brnf_sysctl_header); +#endif +} + +module_init(br_netfilter_init); +module_exit(br_netfilter_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Lennert Buytenhek "); +MODULE_AUTHOR("Bart De Schuymer "); +MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); -- cgit v0.10.2 From 230ac490f7fba2aea52914c69d14b15dd515e49c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Jun 2015 14:07:03 +0200 Subject: netfilter: bridge: split ipv6 code into separated file Resolve compilation breakage when CONFIG_IPV6 is not set by moving the IPv6 code into a separated br_netfilter_ipv6.c file. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 2aa6048..bab824b 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -1,6 +1,66 @@ #ifndef _BR_NETFILTER_H_ #define _BR_NETFILTER_H_ +#include "../../../net/bridge/br_private.h" + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ + skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + + if (likely(skb->nf_bridge)) + atomic_set(&(skb->nf_bridge->use), 1); + + return skb->nf_bridge; +} + +void nf_bridge_update_protocol(struct sk_buff *skb); + +static inline struct nf_bridge_info * +nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb); + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +} + +struct net_device *setup_pre_routing(struct sk_buff *skb); void br_netfilter_enable(void); +#if IS_ENABLED(CONFIG_IPV6) +int br_validate_ipv6(struct sk_buff *skb); +unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state); +#else +static inline int br_validate_ipv6(struct sk_buff *skb) +{ + return -1; +} + +static inline unsigned int +br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_DROP; +} +#endif + #endif /* _BR_NETFILTER_H_ */ diff --git a/net/bridge/Makefile b/net/bridge/Makefile index c52577a..a1cda5d 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -13,6 +13,7 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o br_netfilter-y := br_netfilter_hooks.o +br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index e4e5f2f..d89f4fa 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -123,11 +123,6 @@ struct brnf_frag_data { static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); #endif -static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - static void nf_bridge_info_free(struct sk_buff *skb) { if (skb->nf_bridge) { @@ -136,14 +131,6 @@ static void nf_bridge_info_free(struct sk_buff *skb) } } -static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -{ - struct net_bridge_port *port; - - port = br_port_get_rcu(dev); - return port ? &port->br->fake_rtable : NULL; -} - static inline struct net_device *bridge_parent(const struct net_device *dev) { struct net_bridge_port *port; @@ -152,15 +139,6 @@ static inline struct net_device *bridge_parent(const struct net_device *dev) return port ? port->br->dev : NULL; } -static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) -{ - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); - if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); - - return skb->nf_bridge; -} - static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; @@ -178,7 +156,7 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } -static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) { switch (skb->protocol) { case __cpu_to_be16(ETH_P_8021Q): @@ -190,14 +168,6 @@ static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } -static inline void nf_bridge_push_encap_header(struct sk_buff *skb) -{ - unsigned int len = nf_bridge_encap_header_len(skb); - - skb_push(skb, len); - skb->network_header -= len; -} - static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); @@ -267,112 +237,7 @@ drop: return -1; } -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; -} - -/* Equivalent to br_validate_ipv4 for IPv6 */ -static int br_validate_ipv6(struct sk_buff *skb) -{ - const struct ipv6hdr *hdr; - struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); - u32 pkt_len; - u8 ip6h_len = sizeof(struct ipv6hdr); - - if (!pskb_may_pull(skb, ip6h_len)) - goto inhdr_error; - - if (skb->len < ip6h_len) - goto drop; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - goto inhdr_error; - - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) - goto drop; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - /* No IP options in IPv6 header; however it should be - * checked if some next headers need special treatment - */ - return 0; - -inhdr_error: - IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); -drop: - return -1; -} - -static void nf_bridge_update_protocol(struct sk_buff *skb) +void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { case BRNF_PROTO_8021Q: @@ -391,7 +256,7 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) +int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { struct neighbour *neigh; struct dst_entry *dst; @@ -431,77 +296,11 @@ free_skb: return 0; } -static bool daddr_was_changed(const struct sk_buff *skb, - const struct nf_bridge_info *nf_bridge) +static inline bool +br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) { - switch (skb->protocol) { - case htons(ETH_P_IP): - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; - case htons(ETH_P_IPV6): - return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, - sizeof(ipv6_hdr(skb)->daddr)) != 0; - default: - return false; - } -} - -/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables - * PREROUTING and continue the bridge PRE_ROUTING hook. See comment - * for br_nf_pre_routing_finish(), same logic is used here but - * equivalent IPv6 function ip6_route_input() called indirectly. - */ -static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct rtable *rt; - struct net_device *dev = skb->dev; - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - - nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; - - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { - skb_dst_drop(skb); - v6ops->route_input(skb); - - if (skb_dst(skb)->error) { - kfree_skb(skb); - return 0; - } - - if (skb_dst(skb)->dev == dev) { - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); - return 0; - } - ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); - skb->pkt_type = PACKET_HOST; - } else { - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; - } - skb_dst_set_noref(skb, &rt->dst); - } - - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - - return 0; + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, @@ -558,7 +357,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) nf_bridge->pkt_otherhost = false; } nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { + if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -636,7 +435,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct } /* Some common code for IPv4/IPv6 */ -static struct net_device *setup_pre_routing(struct sk_buff *skb) +struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); @@ -659,35 +458,6 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) return skb->dev; } -/* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables. - */ -static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge; - - if (br_validate_ipv6(skb)) - return NF_DROP; - - nf_bridge_put(skb->nf_bridge); - if (!nf_bridge_alloc(skb)) - return NF_DROP; - if (!setup_pre_routing(skb)) - return NF_DROP; - - nf_bridge = nf_bridge_info_get(skb); - nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; - - skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, - skb->dev, NULL, - br_nf_pre_routing_finish_ipv6); - - return NF_STOLEN; -} - /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. * Replicate the checks that IPv4 does on packet reception. * Set skb->dev to the bridge device (i.e. parent of the diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c new file mode 100644 index 0000000..6d12d26 --- /dev/null +++ b/net/bridge/br_netfilter_ipv6.c @@ -0,0 +1,245 @@ +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * Bart De Schuymer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include +#endif + +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff + * anyway + */ +static int br_nf_check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + u32 pkt_len; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = nh[off + 1] + 2; + + switch (nh[off]) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (nh[off + 1] != 4 || (off & 3) != 2) + goto bad; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + goto bad; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pskb_trim_rcsum(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + nh = skb_network_header(skb); + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; +} + +int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +static inline bool +br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) +{ + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; +} + +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. + */ +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); + + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +/* Replicate the checks that IPv6 does on packet reception and pass the packet + * to ip6tables. + */ +unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + + if (br_validate_ipv6(skb)) + return NF_DROP; + + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IPV6); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish_ipv6); + + return NF_STOLEN; +} -- cgit v0.10.2 From 2fd1dc910b39c97549d2d42a916985795ca8a641 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 17 Jun 2015 10:28:10 -0500 Subject: netfilter: Kill unused copies of RCV_SKB_FAIL This appears to have been a dead macro in both nfnetlink_log.c and nfnetlink_queue_core.c since these pieces of code were added in 2005. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 4ef1fae..4670821 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -598,8 +598,6 @@ nla_put_failure: return -1; } -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_ULOG, .u = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 6eccf0f..e26a46e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -834,8 +834,6 @@ nfqnl_dev_drop(struct net *net, int ifindex) rcu_read_unlock(); } -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) -- cgit v0.10.2 From 17cebfd097fe8a21902602db5196f48e5a9d34e8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 17 Jun 2015 10:28:17 -0500 Subject: net: sched: Simplify em_ipset_match em->net is always set and always available, use it in preference to dev_net(skb->dev). Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index a3d79c8..df0328b 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -92,8 +92,8 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, rcu_read_lock(); - if (dev && skb->skb_iif) - indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif); + if (skb->skb_iif) + indev = dev_get_by_index_rcu(em->net, skb->skb_iif); acpar.in = indev ? indev : dev; acpar.out = dev; -- cgit v0.10.2 From 04c52dec1473c5dff9d07cd39a68c9b23def6c42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:25 -0500 Subject: net: include missing headers in net/net_namespace.h Include linux/idr.h and linux/skbuff.h since they are required by objects that are declared in the net structure. struct net { ... struct idr netns_ids; ... struct sk_buff_head wext_nlevents; ... Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 72eb237..e951453 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include struct user_namespace; struct proc_dir_entry; -- cgit v0.10.2 From 10c04a8e715cca824f96bcbf4af07f5a40985357 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:26 -0500 Subject: netfilter: use forward declaration instead of including linux/proc_fs.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to pull the full definitions in that file, a simple forward declaration is enough. Moreover, include linux/procfs.h from nf_synproxy_core, otherwise this hits a compilation error due to missing declarations, ie. net/netfilter/nf_synproxy_core.c: In function ‘synproxy_proc_init’: net/netfilter/nf_synproxy_core.c:326:2: error: implicit declaration of function ‘proc_create’ [-Werror=implicit-function-declaration] if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat, ^ Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 8874002..cf25b5e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,9 +1,9 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include #include +struct proc_dir_entry; struct nf_logger; struct netns_nf { diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 52e20c9..789feea 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:27 -0500 Subject: netfilter: don't pull include/linux/netfilter.h from netns headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pulls the full hook netfilter definitions from all those that include net_namespace.h. Instead let's just include the bare minimum required in the new linux/netfilter_defs.h file, and use it from the netfilter netns header files. I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit this compilation error: In file included from include/linux/netfilter_defs.h:4:0, from include/net/netns/netfilter.h:4, from include/net/net_namespace.h:22, from include/linux/netdevice.h:43, from net/netfilter/nfnetlink_queue_core.c:23: include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in; And also explicit include linux/netfilter.h in several spots. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f5ff5d1..00050df 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -10,7 +10,8 @@ #include #include #include -#include +#include + #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) { @@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, int netfilter_init(void); -/* Largest hook number + 1 */ -#define NF_MAX_HOOKS 8 - struct sk_buff; struct nf_hook_ops; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h new file mode 100644 index 0000000..d3a7f85 --- /dev/null +++ b/include/linux/netfilter_defs.h @@ -0,0 +1,9 @@ +#ifndef __LINUX_NETFILTER_CORE_H_ +#define __LINUX_NETFILTER_CORE_H_ + +#include + +/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ +#define NF_MAX_HOOKS 8 + +#endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cf25b5e..532e4ba 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,7 +1,7 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include +#include struct proc_dir_entry; struct nf_logger; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 4d6597a..c8a7681 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -2,7 +2,7 @@ #define __NETNS_X_TABLES_H #include -#include +#include struct ebt_table; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 177027c..d93f949 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -4,7 +4,8 @@ #include #include #include - +#include +#include /* Responses from hook functions. */ #define NF_DROP 0 diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 21678ac..928a0fb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,6 +8,7 @@ #include #include #include +#include static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, const struct in6_addr *dst, -- cgit v0.10.2 From 8f481b50ea653ff0aea6accbb4bb02a15cf00531 Mon Sep 17 00:00:00 2001 From: Eric W Biederman Date: Wed, 17 Jun 2015 10:28:35 -0500 Subject: netfilter: Remove spurios included of netfilter.h While testing my netfilter changes I noticed several files where recompiling unncessarily because they unncessarily included netfilter.h. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 63ff08a..7856b6c 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 14839bc..686f37d 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include #include diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 2f5eda8..6676607 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4273533..9c891d0 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 7ed8ab7..29a3687 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 7c646bb..b563a3f 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index be2acab..8ddd41b 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 71c4bad..4ad2fb7 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 96b64d2..d72a4f1 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index e873d7d..c76638c 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -25,7 +25,6 @@ #include #include #include -#include #include static void rose_ftimer_expiry(unsigned long); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 40148932..0fc76d8 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 98b0426..56e354f 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -35,9 +35,6 @@ #include #include #include -#include -#include -#include #include #include #include -- cgit v0.10.2 From dcb8f5c8139ef945cdfd55900fae265c4dbefc02 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Jun 2015 23:58:28 +0200 Subject: netfilter: xtables: fix warnings on 32bit platforms On 32bit archs gcc complains due to cast from void* to u64. Add intermediate casts to long to silence these warnings. include/linux/netfilter/x_tables.h:376:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] include/linux/netfilter/x_tables.h:384:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:391:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:400:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fixes: 71ae0dff02d756e ("netfilter: xtables: use percpu rule counters") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1c97a22..286098a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -372,7 +372,7 @@ static inline u64 xt_percpu_counter_alloc(void) if (res == NULL) return (u64) -ENOMEM; - return (__force u64) res; + return (u64) (__force unsigned long) res; } return 0; @@ -380,14 +380,14 @@ static inline u64 xt_percpu_counter_alloc(void) static inline void xt_percpu_counter_free(u64 pcnt) { if (nr_cpu_ids > 1) - free_percpu((void __percpu *) pcnt); + free_percpu((void __percpu *) (unsigned long) pcnt); } static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) - return this_cpu_ptr((void __percpu *) cnt->pcnt); + return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } @@ -396,7 +396,7 @@ static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) - return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } -- cgit v0.10.2