From 779668450a990d402d316ffd9b7b103fbe5ab6f3 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 21 May 2015 10:57:12 -0300 Subject: netfilter: conntrack: warn the user if there is a better helper to use After db29a9508a92 ("netfilter: conntrack: disable generic tracking for known protocols"), if the specific helper is built but not loaded (a standard for most distributions) systems with a restrictive firewall but weak configuration regarding netfilter modules to load, will silently stop working. This patch then puts a warning message so the sysadmin knows where to start looking into. It's a pr_warn_once regardless of protocol itself but it should be enough to give a hint on where to look. Cc: Florian Westphal Cc: Daniel Borkmann Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 60865f1..2281be4 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -90,7 +90,13 @@ static int generic_packet(struct nf_conn *ct, static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { - return nf_generic_should_process(nf_ct_protonum(ct)); + bool ret; + + ret = nf_generic_should_process(nf_ct_protonum(ct)); + if (!ret) + pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", + nf_ct_protonum(ct)); + return ret; } #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) -- cgit v0.10.2 From d39a33ed9b9ac0939a5b2ddc4dd3d283373bbe89 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:26:13 +0200 Subject: netfilter: bridge: refactor clearing BRNF_NF_BRIDGE_PREROUTING use binary AND on complement of BRNF_NF_BRIDGE_PREROUTING to unset bit in nf_bridge->mask. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 46660a2..2651876 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -290,7 +290,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } - nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; rt = bridge_parent_rtable(nf_bridge->physindev); if (!rt) { @@ -415,7 +415,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } - nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; if (daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); -- cgit v0.10.2 From 8cae308d2bc81f95c320e7a345b92be6c238f510 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:26:57 +0200 Subject: netfilter: bridge: re-order br_nf_pre_routing_finish_ipv6() Put br_nf_pre_routing_finish_ipv6() after daddr_was_changed() and br_nf_pre_routing_finish_bridge() to prepare calling these functions from there. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2651876..6cb642c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -278,37 +278,6 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) } } -/* PF_BRIDGE/PRE_ROUTING *********************************************/ -/* Undo the changes made for ip6tables PREROUTING and continue the - * bridge PRE_ROUTING hook. */ -static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct rtable *rt; - - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; - } - skb_dst_set_noref(skb, &rt->dst); - - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - - return 0; -} - /* Obtain the correct destination MAC address, while preserving the original * source MAC address. If we already know this address, we just copy it. If we * don't, we use the neighbour framework to find out. In both cases, we make @@ -360,6 +329,38 @@ static bool daddr_was_changed(const struct sk_buff *skb, return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } +/* PF_BRIDGE/PRE_ROUTING *********************************************/ +/* Undo the changes made for ip6tables PREROUTING and continue the + * bridge PRE_ROUTING hook. + */ +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * This is also true when SNAT takes place (for the reply direction). -- cgit v0.10.2 From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:27:40 +0200 Subject: netfilter: bridge: detect NAT66 correctly and change MAC address IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-iptables=1 $ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This does not work with ip6tables on a bridge in NAT66 scenario because the REDIRECT/DNAT/SNAT is not correctly detected. The bridge pre-routing (finish) netfilter hook has to check for a possible redirect and then fix the destination mac address. This allows to use the ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4 iptables version. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-ip6tables=1 $ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested on a bridge with two interfaces using SNAT/DNAT NAT66 rules. Reported-by: Artie Hamilton Signed-off-by: Sven Eckelmann [bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()] [bernhard.thaler@wvnet.at: rebased, split into separate patches] Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 64dad1cc..e2d1969 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void); struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); + void (*route_input)(struct sk_buff *skb); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc..f70fc0e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -179,7 +180,10 @@ struct nf_bridge_info { struct net_device *physoutdev; char neigh_header[8]; }; - __be32 ipv4_daddr; + union { + __be32 ipv4_daddr; + struct in6_addr ipv6_daddr; + }; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6cb642c..9ac0c64 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -326,30 +326,63 @@ free_skb: static bool daddr_was_changed(const struct sk_buff *skb, const struct nf_bridge_info *nf_bridge) { - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + switch (skb->protocol) { + case htons(ETH_P_IP): + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + case htons(ETH_P_IPV6): + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; + default: + return false; + } } -/* PF_BRIDGE/PRE_ROUTING *********************************************/ -/* Undo the changes made for ip6tables PREROUTING and continue the - * bridge PRE_ROUTING hook. +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. */ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); } - skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -579,6 +612,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; const struct ipv6hdr *hdr; u32 pkt_len; @@ -610,6 +644,9 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IPV6); NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, skb->dev, NULL, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d958718..bbca09f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,6 +191,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v0.10.2 From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:28:28 +0200 Subject: netfilter: bridge: refactor frag_max_size Currently frag_max_size is member of br_input_skb_cb and copied back and forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or used. Attach frag_max_size to nf_bridge_info and set value in pre_routing and forward functions. Use its value in forward and xmit functions. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f70fc0e..32b105e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -174,6 +174,7 @@ struct nf_bridge_info { BRNF_PROTO_PPPOE } orig_proto:8; bool pkt_otherhost; + __u16 frag_max_size; unsigned int mask; struct net_device *physindev; union { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9ac0c64..1f30b28 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -440,10 +440,8 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; - int frag_max_size; - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; @@ -738,11 +736,9 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - int frag_max_size; if (skb->protocol == htons(ETH_P_IP)) { - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; } in = nf_bridge->physindev; @@ -806,12 +802,9 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, } if (pf == NFPROTO_IPV4) { - int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; - if (br_parse_ip_options(skb)) return NF_DROP; - - IPCB(skb)->frag_max_size = frag_max; + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } nf_bridge->physoutdev = skb->dev; @@ -904,7 +897,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; - int frag_max_size; + struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { @@ -913,17 +906,18 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) } mtu_reserved = nf_bridge_mtu_reduction(skb); + nf_bridge = nf_bridge_info_get(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { struct brnf_frag_data *data; - frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; - IPCB(skb)->frag_max_size = frag_max_size; + + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; nf_bridge_update_protocol(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1f36fa7..8cde96e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -304,7 +304,6 @@ struct br_input_skb_cb { int mrouters_only; #endif - u16 frag_max_size; bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING -- cgit v0.10.2 From 77d574e7283cfcbb2c134f4431f77dff1c54423e Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:29:02 +0200 Subject: netfilter: bridge: rename br_parse_ip_options br_parse_ip_options() does not parse any IP options, it validates IP packets as a whole and the function name is misleading. Rename br_parse_ip_options() to br_validate_ipv4() and remove unneeded commments. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1f30b28..962d5f8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -216,7 +216,7 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) * expected format */ -static int br_parse_ip_options(struct sk_buff *skb) +static int br_validate_ipv4(struct sk_buff *skb) { const struct iphdr *iph; struct net_device *dev = skb->dev; @@ -692,7 +692,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header_rcsum(skb); - if (br_parse_ip_options(skb)) + if (br_validate_ipv4(skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -802,7 +802,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, } if (pf == NFPROTO_IPV4) { - if (br_parse_ip_options(skb)) + if (br_validate_ipv4(skb)) return NF_DROP; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } @@ -913,8 +913,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (skb->len + mtu_reserved > skb->dev->mtu) { struct brnf_frag_data *data; - if (br_parse_ip_options(skb)) - /* Drop invalid packet */ + if (br_validate_ipv4(skb)) return NF_DROP; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; -- cgit v0.10.2 From a4611d3b74b56658438ad1de4737a61a46be0fc0 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:29:38 +0200 Subject: netfilter: bridge: re-order check_hbh_len() Prepare check_hbh_len() to be called from newly introduced br_validate_ipv6() in next commit. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 962d5f8..d201ea4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -264,6 +264,62 @@ drop: return -1; } +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff + * anyway + */ +static int check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + u32 pkt_len; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = nh[off + 1] + 2; + + switch (nh[off]) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (nh[off + 1] != 4 || (off & 3) != 2) + goto bad; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + goto bad; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pskb_trim_rcsum(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + nh = skb_network_header(skb); + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; +} + static void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { @@ -549,61 +605,6 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) return skb->dev; } -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ -static int check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *) (nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; - -} - /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, -- cgit v0.10.2 From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:30:16 +0200 Subject: netfilter: bridge: forward IPv6 fragmented packets IPv6 fragmented packets are not forwarded on an ethernet bridge with netfilter ip6_tables loaded. e.g. steps to reproduce 1) create a simple bridge like this modprobe br_netfilter brctl addbr br0 brctl addif br0 eth0 brctl addif br0 eth2 ifconfig eth0 up ifconfig eth2 up ifconfig br0 up 2) place a host with an IPv6 address on each side of the bridge set IPv6 address on host A: ip -6 addr add fd01:2345:6789:1::1/64 dev eth0 set IPv6 address on host B: ip -6 addr add fd01:2345:6789:1::2/64 dev eth0 3) run a simple ping command on host A with packets > MTU ping6 -s 4000 fd01:2345:6789:1::2 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge IPv6 fragmented packets traverse the bridge cleanly until somebody runs. "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are loaded) IPv6 fragmented packets do not traverse the bridge any more (you see no more responses in ping's output). After applying this patch IPv6 fragmented packets traverse the bridge cleanly in above scenario. Signed-off-by: Bernhard Thaler [pablo@netfilter.org: small changes to br_nf_dev_queue_xmit] Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index e2d1969..8b7d28f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -26,6 +26,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index d201ea4..535f9da 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -320,6 +321,55 @@ bad: return -1; } +/* Equivalent to br_validate_ipv4 for IPv6 */ +static int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + static void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { @@ -405,6 +455,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) struct net_device *dev = skb->dev; const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; @@ -606,35 +658,15 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) } /* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables, which doesn't support NAT, so things are fairly simple. */ + * to ip6tables. + */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; - const struct ipv6hdr *hdr; - u32 pkt_len; - - if (skb->len < sizeof(struct ipv6hdr)) - return NF_DROP; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return NF_DROP; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - return NF_DROP; - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return NF_DROP; - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - return NF_DROP; - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + if (br_validate_ipv6(skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -738,9 +770,11 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; - } + + if (skb->protocol == htons(ETH_P_IPV6)) + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; in = nf_bridge->physindev; if (nf_bridge->pkt_otherhost) { @@ -808,6 +842,12 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } + if (pf == NFPROTO_IPV6) { + if (br_validate_ipv6(skb)) + return NF_DROP; + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -855,7 +895,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; @@ -875,6 +915,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } +#endif static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) @@ -897,21 +938,23 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - int ret; struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; - if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { + mtu_reserved = nf_bridge_mtu_reduction(skb); + + if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } - mtu_reserved = nf_bridge_mtu_reduction(skb); nf_bridge = nf_bridge_info_get(skb); + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->len + mtu_reserved > skb->dev->mtu) { + if (skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; if (br_validate_ipv4(skb)) @@ -928,21 +971,37 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); - } else { - nf_bridge_info_free(skb); - ret = br_dev_queue_push_xmit(sk, skb); + return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); } +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + struct brnf_frag_data *data; - return ret; -} -#else -static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) -{ + if (br_validate_ipv6(skb)) + return NF_DROP; + + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + if (v6ops) + return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); + else + return -EMSGSIZE; + } +#endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } -#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8cde96e..5dccced 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #define BR_HASH_BITS 8 @@ -214,7 +215,10 @@ struct net_bridge spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - struct rtable fake_rtable; + union { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; bool nf_call_iptables; bool nf_call_ip6tables; bool nf_call_arptables; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index bbca09f..b4de08a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,7 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input + .route_input = ip6_route_input, + .fragment = ip6_fragment }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v0.10.2 From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2015 13:28:38 +0200 Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling since commit d6b915e29f4adea9 ("ip_fragment: don't forward defragmented DF packet") the largest fragment size is available in the IPCB. Therefore we no longer need to care about 'encapsulation' overhead of stripped PPPOE/VLAN headers since ip_do_fragment doesn't use device mtu in such cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f2fdb5a..6d80fc6 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,13 +20,6 @@ enum nf_br_hook_priorities { #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) -{ - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) - return PPPOE_SES_HLEN; - return 0; -} - int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 535f9da..1e62ae5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -936,6 +936,13 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + return PPPOE_SES_HLEN; + return 0; +} + static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f5f5ef1..19d7e43 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -549,10 +549,6 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge) - mtu -= nf_bridge_mtu_reduction(skb); -#endif IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: -- cgit v0.10.2 From d7b597421519d6f680eb8e152a0d8447466ee2d6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2015 13:27:13 +0200 Subject: netfilter: bridge: restore vlan tag when refragmenting If bridge netfilter is used with both bridge-nf-call-iptables and bridge-nf-filter-vlan-tagged enabled then ip fragments in VLAN frames are sent without the vlan header. This has never worked reliably. Turns out this relied on pre-3.5 behaviour where skb frag_list was used to store ip fragments; ip_fragment() then re-used these skbs. But since commit 3cc4949269e01f39443d0fcfffb5bc6b47878d45 ("ipv4: use skb coalescing in defragmentation") this is no longer the case. ip_do_fragment now needs to allocate new skbs, but these don't contain the vlan tag information anymore. Fix it by storing vlan information of the ressembled skb in the br netfilter percpu frag area, and restore them for each of the fragments. Fixes: 3cc4949269e01f3 ("ipv4: use skb coalescing in defragmentation") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1e62ae5..e4e5f2f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -116,6 +116,8 @@ struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; u8 size; + u16 vlan_tci; + __be16 vlan_proto; }; static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); @@ -909,6 +911,11 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) return 0; } + if (data->vlan_tci) { + skb->vlan_tci = data->vlan_tci; + skb->vlan_proto = data->vlan_proto; + } + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); __skb_push(skb, data->encap_size); @@ -972,6 +979,9 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) nf_bridge_update_protocol(skb); data = this_cpu_ptr(&brnf_frag_data_storage); + + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; -- cgit v0.10.2 From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:54 +0200 Subject: netfilter: xtables: use percpu rule counters The binary arp/ip/ip6tables ruleset is stored per cpu. The only reason left as to why we need percpu duplication are the rule counters embedded into ipt_entry et al -- since each cpu has its own copy of the rules, all counters can be lockless. The downside is that the more cpus are supported, the more memory is required. Rules are not just duplicated per online cpu but for each possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times, not for the e.g. 64 cores present. To save some memory and also improve utilization of shared caches it would be preferable to only store the rule blob once. So we first need to separate counters and the rule blob. Instead of using entry->counters, allocate this percpu and store the percpu address in entry->counters.pcnt on CONFIG_SMP. This change makes no sense as-is; it is merely an intermediate step to remove the percpu duplication of the rule set in a followup patch. Suggested-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Reported-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 09f3820..b77ab9f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } + +/* On SMP, ip(6)t_entry->counters.pcnt holds address of the + * real (percpu) counter. On !SMP, its just the packet count, + * so nothing needs to be done there. + * + * xt_percpu_counter_alloc returns the address of the percpu + * counter, or 0 on !SMP. + * + * Hence caller must use IS_ERR_VALUE to check for error, this + * allows us to return 0 for single core systems without forcing + * callers to deal with SMP vs. NONSMP issues. + */ +static inline u64 xt_percpu_counter_alloc(void) +{ + if (nr_cpu_ids > 1) { + void __percpu *res = alloc_percpu(struct xt_counters); + + if (res == NULL) + return (u64) -ENOMEM; + + return (__force u64) res; + } + + return 0; +} +static inline void xt_percpu_counter_free(u64 pcnt) +{ + if (nr_cpu_ids > 1) + free_percpu((void __percpu *) pcnt); +} + +static inline struct xt_counters * +xt_get_this_cpu_counter(struct xt_counters *cnt) +{ + if (nr_cpu_ids > 1) + return this_cpu_ptr((void __percpu *) cnt->pcnt); + + return cnt; +} + +static inline struct xt_counters * +xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) +{ + if (nr_cpu_ids > 1) + return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + + return cnt; +} + struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a612007..0ada09a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -289,13 +289,15 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct xt_entry_target *t; + struct xt_counters *counter; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -521,6 +523,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); @@ -538,6 +544,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) err: module_put(t->u.kernel.target->me); out: + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -614,6 +622,7 @@ static inline void cleanup_entry(struct arpt_entry *e) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -723,13 +732,15 @@ static void get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1186,7 +1197,10 @@ static int do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1416,9 +1430,17 @@ static int translate_compat_table(const char *name, i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { + iter1->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(iter1->counters.pcnt)) { + ret = -ENOMEM; + break; + } + ret = check_target(iter1, name); - if (ret != 0) + if (ret != 0) { + xt_percpu_counter_free(iter1->counters.pcnt); break; + } ++i; if (strcmp(arpt_get_target(iter1)->u.user.name, XT_ERROR_TARGET) == 0) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e7abf51..d190b10 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -345,6 +345,7 @@ ipt_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); if (!ip_packet_match(ip, indev, outdev, @@ -361,7 +362,8 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -665,6 +667,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -691,6 +697,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, ret = check_target(e, net, name); if (ret) goto err; + return 0; err: module_put(t->u.kernel.target->me); @@ -700,6 +707,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -784,6 +794,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -888,13 +899,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,10 @@ do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1608,6 +1624,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) unsigned int j; int ret = 0; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -1632,6 +1652,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cdd085f..a1190ee 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -367,6 +367,7 @@ ip6t_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); acpar.thoff = 0; @@ -384,7 +385,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -679,6 +681,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -714,6 +720,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -797,6 +806,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -901,13 +912,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, goto free; } - local_bh_disable(); private = t->private; if (private->number != num_counters) { @@ -1388,7 +1400,10 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, addend = xt_write_recseq_begin(); loc_cpu_entry = private->entries[curcpu]; xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1621,6 +1636,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; j = 0; mtpar.net = net; mtpar.table = name; @@ -1645,6 +1663,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } -- cgit v0.10.2 From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:55 +0200 Subject: netfilter: xtables: avoid percpu ruleset duplication We store the rule blob per (possible) cpu. Unfortunately this means we can waste lot of memory on big smp machines. ipt_entry structure ('rule head') is 112 byte, so e.g. with maxcpu=64 one single rule eats close to 8k RAM. Since previous patch made counters percpu it appears there is nothing left in the rule blob that needs to be percpu. On my test system (144 possible cpus, 400k dummy rules) this change saves close to 9 Gigabyte of RAM. Reported-by: Marcelo Ricardo Leitner Acked-by: Jesper Dangaard Brouer Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b77ab9f..9969d79 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -224,9 +224,9 @@ struct xt_table_info { unsigned int stacksize; unsigned int __percpu *stackptr; void ***jumpstack; - /* ipt_entry tables: one per CPU */ + /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries[1]; + void *entries; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0ada09a..d75c139 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -275,7 +275,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[smp_processor_id()]; + table_base = private->entries; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -711,12 +711,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -731,7 +725,7 @@ static void get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -785,7 +779,7 @@ static int copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; /* ... then copy entire thing ... */ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; @@ -880,10 +874,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(NFPROTO_ARP, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1048,7 +1042,7 @@ static int __do_replace(struct net *net, const char *name, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) cleanup_entry(iter); @@ -1095,8 +1089,7 @@ static int do_replace(struct net *net, const void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1126,7 +1119,7 @@ static int do_replace(struct net *net, const void __user *user, static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1136,7 +1129,6 @@ static int do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct arpt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1192,11 +1184,9 @@ static int do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1410,7 +1400,7 @@ static int translate_compat_table(const char *name, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1470,11 +1460,6 @@ static int translate_compat_table(const char *name, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1533,8 +1518,7 @@ static int compat_do_replace(struct net *net, void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; @@ -1631,7 +1615,6 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; unsigned int i = 0; struct arpt_entry *iter; @@ -1639,11 +1622,9 @@ static int compat_copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy on our node/cpu */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -1812,8 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(newinfo, loc_cpu_entry, repl); @@ -1844,7 +1824,7 @@ void arpt_unregister_table(struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter); if (private->number > private->initial_entries) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d190b10..6151500 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ipt_entry *e) { - const void *table_base; const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; @@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -877,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -898,7 +890,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -946,17 +938,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1070,10 +1058,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1194,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_old_entry; struct ipt_entry *iter; ret = 0; @@ -1237,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1284,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1316,7 +1301,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1326,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct ipt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1382,11 +1366,8 @@ do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1739,7 +1720,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1791,11 +1772,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1842,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1914,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ipt_entry *iter; @@ -1922,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2104,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2136,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a1190ee..80a7f0d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ip6t_entry *e) { - const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; @@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb, */ smp_read_barrier_depends(); cpu = smp_processor_id(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -890,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -911,7 +903,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -959,17 +951,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1083,10 +1071,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1207,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - const void *loc_cpu_old_entry; struct ip6t_entry *iter; ret = 0; @@ -1250,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1297,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1329,7 +1314,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1339,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1395,11 +1379,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); addend = xt_write_recseq_begin(); - loc_cpu_entry = private->entries[curcpu]; - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1407,7 +1388,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, ++i; } xt_write_recseq_end(addend); - unlock_up_free: local_bh_enable(); xt_table_unlock(t); @@ -1750,7 +1730,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1802,11 +1782,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1853,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1925,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ip6t_entry *iter; @@ -1933,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2115,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2146,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8303246..6062ce3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -659,7 +659,6 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { struct xt_table_info *newinfo; - int cpu; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) @@ -671,19 +670,14 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) newinfo->size = size; - for_each_possible_cpu(cpu) { - if (size <= PAGE_SIZE) - newinfo->entries[cpu] = kmalloc_node(size, - GFP_KERNEL, - cpu_to_node(cpu)); - else - newinfo->entries[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); + if (size <= PAGE_SIZE) + newinfo->entries = kmalloc(size, GFP_KERNEL); + else + newinfo->entries = vmalloc(size); - if (newinfo->entries[cpu] == NULL) { - xt_free_table_info(newinfo); - return NULL; - } + if (newinfo->entries == NULL) { + xt_free_table_info(newinfo); + return NULL; } return newinfo; @@ -694,8 +688,7 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - for_each_possible_cpu(cpu) - kvfree(info->entries[cpu]); + kvfree(info->entries); if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) -- cgit v0.10.2 From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 21:07:54 +0200 Subject: netfilter: ipset: Use MSEC_PER_SEC consistently Signed-off-by: Jozsef Kadlecsik diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e..3c8842b 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t) return; } - *timeout = msecs_to_jiffies(t * 1000) + jiffies; + *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; if (*timeout == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ (*timeout)--; @@ -71,7 +71,7 @@ static inline u32 ip_set_timeout_get(unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; } #endif /* __KERNEL__ */ -- cgit v0.10.2 From edda0791743eafc36382fb893d91547f36edaf7d Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 12 Jun 2015 21:11:54 +0200 Subject: netfilter: ipset: Use SET_WITH_*() helpers to test set extensions Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 475e496..347d97a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -390,12 +390,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], { u64 fullmark; if (tb[IPSET_ATTR_TIMEOUT]) { - if (!(set->extensions & IPSET_EXT_TIMEOUT)) + if (!SET_WITH_TIMEOUT(set)) return -IPSET_ERR_TIMEOUT; ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { - if (!(set->extensions & IPSET_EXT_COUNTER)) + if (!SET_WITH_COUNTER(set)) return -IPSET_ERR_COUNTER; if (tb[IPSET_ATTR_BYTES]) ext->bytes = be64_to_cpu(nla_get_be64( @@ -405,25 +405,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], tb[IPSET_ATTR_PACKETS])); } if (tb[IPSET_ATTR_COMMENT]) { - if (!(set->extensions & IPSET_EXT_COMMENT)) + if (!SET_WITH_COMMENT(set)) return -IPSET_ERR_COMMENT; ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); } if (tb[IPSET_ATTR_SKBMARK]) { - if (!(set->extensions & IPSET_EXT_SKBINFO)) + if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); ext->skbmark = fullmark >> 32; ext->skbmarkmask = fullmark & 0xffffffff; } if (tb[IPSET_ATTR_SKBPRIO]) { - if (!(set->extensions & IPSET_EXT_SKBINFO)) + if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; ext->skbprio = be32_to_cpu(nla_get_be32( tb[IPSET_ATTR_SKBPRIO])); } if (tb[IPSET_ATTR_SKBQUEUE]) { - if (!(set->extensions & IPSET_EXT_SKBINFO)) + if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; ext->skbqueue = be16_to_cpu(nla_get_be16( tb[IPSET_ATTR_SKBQUEUE])); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7952869..8dd82db 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -431,7 +431,7 @@ mtype_destroy(struct ip_set *set) { struct htype *h = set->data; - if (set->extensions & IPSET_EXT_TIMEOUT) + if (SET_WITH_TIMEOUT(set)) del_timer_sync(&h->gc); mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); -- cgit v0.10.2 From 7dd37bc8e605d3ce14e6a1bc88ebbfae7ef43b9f Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 12 Jun 2015 21:14:09 +0200 Subject: netfilter: ipset: Check extensions attributes before getting extensions. Make all extensions attributes checks within ip_set_get_extensions() and reduce number of duplicated code. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 2fe6de4..212005e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -138,13 +138,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index eb18856..0648e74 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -239,13 +239,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip = 0; int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 898edb6..10ed264 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -137,13 +137,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], int ret = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 347d97a..68ae551 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -389,6 +389,15 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) { u64 fullmark; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + if (tb[IPSET_ATTR_TIMEOUT]) { if (!SET_WITH_TIMEOUT(set)) return -IPSET_ERR_TIMEOUT; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 54df48b..0d955a4 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -108,13 +108,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip = 0, ip_to = 0, hosts; int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -247,12 +241,6 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], int ret; if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index d231248..f4e14ec 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -109,13 +109,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], int ret; if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -242,12 +236,6 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index a47c29f..02d9dba 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -118,13 +118,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -282,12 +276,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 89615f1..4e3b1c3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -121,13 +121,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -293,12 +287,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6ba7a7e..988567e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -176,13 +176,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -429,13 +423,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 1f8668d..0d49519 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -107,13 +107,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; - if (unlikely(!tb[IPSET_ATTR_ETHER] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + if (unlikely(!tb[IPSET_ATTR_ETHER])) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 2e63dad..5eb334d 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -147,13 +147,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], int ret; if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -319,13 +313,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], int ret; if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index fe481f6..5eef6f3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -295,13 +295,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -531,13 +525,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 8470474..775b1b0 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -169,13 +169,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -400,13 +394,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 8273819..0eb73da 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -169,13 +169,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -387,13 +381,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 1451a8a..c3634ad 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -187,13 +187,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -463,13 +457,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 5bd3b1e..7d1377d 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -384,13 +384,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], int ret = 0; if (unlikely(!tb[IPSET_ATTR_NAME] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) -- cgit v0.10.2 From 2c227f278a92ca3a1515373cdf9cce3766433e40 Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 12 Jun 2015 21:23:31 +0200 Subject: netfilter: ipset: Permit CIDR equal to the host address CIDR in IPv6 Permit userspace to supply CIDR length equal to the host address CIDR length in netlink message. Prohibit any other CIDR length for IPv6 variant of the set. Also return -IPSET_ERR_HASH_RANGE_UNSUPPORTED instead of generic -IPSET_ERR_PROTOCOL in IPv6 variant of hash:ip,port,net when IPSET_ATTR_IP_TO attribute is given. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 0d955a4..2b08b1b 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -240,10 +240,16 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; - if (unlikely(!tb[IPSET_ATTR_IP] || - tb[IPSET_ATTR_IP_TO] || - tb[IPSET_ATTR_CIDR])) + if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + if (unlikely(tb[IPSET_ATTR_CIDR])) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr != HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index f4e14ec..68fe40c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -235,10 +235,16 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], int ret; if (unlikely(!tb[IPSET_ATTR_IP] || - !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || - tb[IPSET_ATTR_IP_TO] || - tb[IPSET_ATTR_CIDR])) + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + if (unlikely(tb[IPSET_ATTR_CIDR])) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr != HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 02d9dba..9f03639 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -275,10 +275,16 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - tb[IPSET_ATTR_IP_TO] || - tb[IPSET_ATTR_CIDR])) + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + if (unlikely(tb[IPSET_ATTR_CIDR])) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr != HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 4e3b1c3..aa0966b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -286,10 +286,16 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - tb[IPSET_ATTR_IP_TO] || - tb[IPSET_ATTR_CIDR])) + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + if (unlikely(tb[IPSET_ATTR_CIDR])) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr != HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 988567e..f3c6256 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -423,12 +423,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - tb[IPSET_ATTR_IP_TO] || - tb[IPSET_ATTR_CIDR])) + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + if (unlikely(tb[IPSET_ATTR_CIDR])) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr != HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -- cgit v0.10.2 From a212e08e8e0a5c689e61dd175b6e99223dda835c Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 12 Jun 2015 21:26:43 +0200 Subject: netfilter: ipset: Make sure we always return line number on batch Even if we return with generic IPSET_ERR_PROTOCOL it is good idea to return line number if we called in batch mode. Moreover we are not always exiting with IPSET_ERR_PROTOCOL. For example hash:ip,port,net may return IPSET_ERR_HASH_RANGE_UNSUPPORTED or IPSET_ERR_INVALID_CIDR. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 212005e..7af99c3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -138,12 +138,12 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP])) - return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP])) + return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0648e74..7733422 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -239,12 +239,12 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip = 0; int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP])) - return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP])) + return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 10ed264..ec3bda1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -136,13 +136,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], u16 port_to; int ret = 0; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); if (port < map->first_port || port > map->last_port) return -IPSET_ERR_BITMAP_RANGE; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 2b08b1b..2bbadcc 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -108,12 +108,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip = 0, ip_to = 0, hosts; int ret = 0; - if (unlikely(!tb[IPSET_ATTR_IP])) - return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP])) + return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -240,6 +240,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) @@ -251,9 +254,6 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_INVALID_CIDR; } - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 68fe40c..3aafb36 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -108,13 +108,13 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip, ip_to = 0; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; @@ -234,6 +234,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_MARK))) return -IPSET_ERR_PROTOCOL; @@ -246,9 +249,6 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_INVALID_CIDR; } - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9f03639..4db1270 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -116,14 +116,14 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], bool with_ports = false; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; @@ -273,6 +273,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], bool with_ports = false; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) @@ -286,9 +289,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_INVALID_CIDR; } - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index aa0966b..c01bf68 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -119,14 +119,14 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], bool with_ports = false; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; @@ -284,6 +284,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], bool with_ports = false; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) @@ -297,9 +300,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_INVALID_CIDR; } - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f3c6256..e38a029 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -173,15 +173,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -420,6 +420,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -434,9 +437,6 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_INVALID_CIDR; } - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 0d49519..8981c8b 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -107,12 +107,12 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; - if (unlikely(!tb[IPSET_ATTR_ETHER])) - return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_ETHER])) + return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5eb334d..2988ec5 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -146,13 +146,13 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], u32 ip = 0, ip_to = 0, last; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -312,15 +312,15 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 5eef6f3..c80588c 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -293,14 +293,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], char iface[IFNAMSIZ]; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -523,6 +523,9 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], char iface[IFNAMSIZ]; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) @@ -530,9 +533,6 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 775b1b0..2e6a1ae 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -167,14 +167,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -392,6 +392,9 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) @@ -399,9 +402,6 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 0eb73da..7a6448c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -166,15 +166,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -378,6 +378,9 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -386,9 +389,6 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index c3634ad..7ad9a77 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -183,6 +183,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || @@ -190,9 +193,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; @@ -453,6 +453,9 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], bool with_ports = false; int ret; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || @@ -462,9 +465,6 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]); if (ret) return ret; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 7d1377d..107ea6c 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -383,13 +383,13 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set *s; int ret = 0; + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_NAME] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) - *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; -- cgit v0.10.2 From aff227581ed1ac299e3a50eef4bb1cef944e1404 Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 12 Jun 2015 21:30:57 +0200 Subject: netfilter: ipset: Check CIDR value only when attribute is given There is no reason to check CIDR value regardless attribute specifying CIDR is given. Initialize cidr array in element structure on element structure declaration to let more freedom to the compiler to optimize initialization right before element structure is used. Remove local variables cidr and cidr2 for netnet and netportnet hashes as we do not use packed cidr value for such set types and can store value directly in e.cidr[]. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 2988ec5..d19926a 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -329,11 +329,11 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - - if (!e.cidr || e.cidr > HOST_MASK) - return -IPSET_ERR_INVALID_CIDR; + if (!e.cidr || e.cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } ip6_netmask(&e.ip, e.cidr); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index c80588c..7b69fa2 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -541,10 +541,12 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (e.cidr > HOST_MASK) - return -IPSET_ERR_INVALID_CIDR; + if (e.cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } + ip6_netmask(&e.ip, e.cidr); strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 2e6a1ae..11eee00 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -160,17 +160,15 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { }; + struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; - u8 cidr, cidr2; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; @@ -188,17 +186,15 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; if (tb[IPSET_ATTR_CIDR]) { - cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > HOST_MASK) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!e.cidr[0] || e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; - e.cidr[0] = cidr; } if (tb[IPSET_ATTR_CIDR2]) { - cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!cidr2 || cidr2 > HOST_MASK) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!e.cidr[1] || e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; - e.cidr[1] = cidr2; } if (tb[IPSET_ATTR_CADT_FLAGS]) { @@ -245,15 +241,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], while (!after(ip, ip_to)) { e.ip[0] = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &cidr); - e.cidr[0] = cidr; + last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ip2 = (retried && ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) : ip2_from; while (!after(ip2, ip2_to)) { e.ip[1] = htonl(ip2); - last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); - e.cidr[1] = cidr2; + last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -388,14 +382,13 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { }; + struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; @@ -414,15 +407,17 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } - if (tb[IPSET_ATTR_CIDR2]) + if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - - if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || - e.cidr[1] > HOST_MASK) - return -IPSET_ERR_INVALID_CIDR; + if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } ip6_netmask(&e.ip[0], e.cidr[0]); ip6_netmask(&e.ip[1], e.cidr[1]); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 7ad9a77..6eb5f87 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -175,18 +175,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { }; + struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; - u8 cidr, cidr2; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -206,17 +204,15 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; if (tb[IPSET_ATTR_CIDR]) { - cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!cidr || cidr > HOST_MASK) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!e.cidr[0] || e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; - e.cidr[0] = cidr; } if (tb[IPSET_ATTR_CIDR2]) { - cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!cidr || cidr > HOST_MASK) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!e.cidr[1] || e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; - e.cidr[1] = cidr; } e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); @@ -285,8 +281,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], while (!after(ip, ip_to)) { e.ip[0] = htonl(ip); - ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); - e.cidr[0] = cidr; + ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { @@ -297,8 +292,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], while (!after(ip2, ip2_to)) { e.ip[1] = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, - &cidr2); - e.cidr[1] = cidr2; + &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -447,7 +441,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { }; + struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; @@ -456,7 +450,6 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -477,15 +470,17 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } - if (tb[IPSET_ATTR_CIDR2]) + if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - - if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || - e.cidr[1] > HOST_MASK)) - return -IPSET_ERR_INVALID_CIDR; + if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + } ip6_netmask(&e.ip[0], e.cidr[0]); ip6_netmask(&e.ip[1], e.cidr[1]); -- cgit v0.10.2 From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 22:11:00 +0200 Subject: netfilter: ipset: Fix cidr handling for hash:*net* types Commit "Simplify cidr handling for hash:*net* types" broke the cidr handling for the hash:*net* types when the sets were used by the SET target: entries with invalid cidr values were added to the sets. Reported by Jonathan Johnson. Testsuite entry is added to verify the fix. Signed-off-by: Jozsef Kadlecsik diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ffdfdc2..a6fe1ce 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (set)->timeout } -#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) - #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 8dd82db..2f1985e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -149,17 +149,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* cidr + 1 is stored in net_prefixes to support /0 */ -#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1) +#define NCIDR_PUT(cidr) ((cidr) + 1) +#define NCIDR_GET(cidr) ((cidr) - 1) #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ -#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1) -#define NCIDR(cidr) (cidr) +#define DCIDR_PUT(cidr) ((cidr) - 1) +#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) #else -#define GCIDR(cidr, i) (__CIDR(cidr, i)) -#define NCIDR(cidr) (cidr - 1) +#define DCIDR_PUT(cidr) (cidr) +#define DCIDR_GET(cidr, i) __CIDR(cidr, i) #endif +#define INIT_CIDR(cidr, host_mask) \ + DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) + #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_NET0 @@ -303,7 +307,8 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different - * sized networks */ + * sized networks. cidr == real cidr + 1 to support /0. + */ static void mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { @@ -498,8 +503,10 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, SCIDR(data->cidr, k), - nets_length, k); + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, + k)), + nets_length, k); #endif ip_set_ext_destroy(set, data); if (j != n->pos - 1) @@ -692,9 +699,9 @@ reuse_slot: data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) { - mtype_del_cidr(h, SCIDR(data->cidr, i), + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), NLEN(set->family), i); - mtype_add_cidr(h, SCIDR(d->cidr, i), + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), NLEN(set->family), i); } #endif @@ -711,8 +718,8 @@ reuse_slot: data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), - i); + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), + NLEN(set->family), i); #endif h->elements++; } @@ -772,8 +779,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, h->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family), - j); + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), + NLEN(set->family), j); #endif ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { @@ -836,12 +843,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; k++) { - mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true); + mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), + true); #else - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0])); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); @@ -889,7 +897,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* If we test an IP address and not a network address, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index e38a029..50248de 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -141,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d19926a..089b23f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -120,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -288,7 +288,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7b69fa2..aac2076 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -235,7 +235,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -469,7 +469,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 11eee00..ed9cc45 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -141,8 +141,8 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -364,8 +364,8 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7a6448c..fbaf813 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -136,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 6eb5f87..a828cbc 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -152,8 +152,8 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -418,8 +418,8 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; -- cgit v0.10.2 From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 11:59:45 +0200 Subject: netfilter: ipset: Fix parallel resizing and listing of the same set When elements added to a hash:* type of set and resizing triggered, parallel listing could start to list the original set (before resizing) and "continue" with listing the new set. Fix it by references and using the original hash table for listing. Therefore the destroying of the original hash table may happen from the resizing or listing functions. Signed-off-by: Jozsef Kadlecsik diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index a6fe1ce..5674b6a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -176,6 +176,9 @@ struct ip_set_type_variant { /* List elements */ int (*list)(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + /* Keep listing private when resizing runs parallel */ + void (*uref)(struct ip_set *set, struct netlink_callback *cb, + bool start); /* Return true if "b" set is the same as "a" * according to the create set parameters */ @@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter, /* Netlink CB args */ enum { - IPSET_CB_NET = 0, - IPSET_CB_DUMP, - IPSET_CB_INDEX, - IPSET_CB_ARG0, + IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_DUMP, /* dump single set/all sets */ + IPSET_CB_INDEX, /* set index */ + IPSET_CB_PRIVATE, /* set private data */ + IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG1, - IPSET_CB_ARG2, }; /* register and unregister set references */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 68ae551..777cac6 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1211,12 +1211,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; if (cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", - ip_set(inst, cb->args[IPSET_CB_INDEX])->name); - __ip_set_put_byindex(inst, - (ip_set_id_t) cb->args[IPSET_CB_INDEX]); + struct ip_set_net *inst = + (struct ip_set_net *)cb->args[IPSET_CB_NET]; + ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; + struct ip_set *set = ip_set(inst, index); + + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); + __ip_set_put_byindex(inst, index); } return 0; } @@ -1247,12 +1251,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[IPSET_CB_NET]: net namespace - * [IPSET_CB_DUMP]: dump single set/all sets - * [IPSET_CB_INDEX]: set index - * [IPSET_CB_ARG0]: type specific - */ - if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1359,6 +1357,8 @@ dump_last: goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; + if (set->variant->uref) + set->variant->uref(set, cb, true); /* Fall through and add elements */ default: read_lock_bh(&set->lock); @@ -1375,6 +1375,8 @@ dump_last: dump_type = DUMP_LAST; cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); cb->args[IPSET_CB_INDEX] = 0; + if (set && set->variant->uref) + set->variant->uref(set, cb, false); goto dump_last; } goto out; @@ -1389,7 +1391,10 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", ip_set(inst, index)->name); + set = ip_set(inst, index); + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); __ip_set_put_byindex(inst, index); cb->args[IPSET_CB_ARG0] = 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2f1985e..5fcf70b 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -71,6 +71,8 @@ struct hbucket { /* The hash table: the table size stored here in order to make resizing easy */ struct htable { + atomic_t ref; /* References for resizing */ + atomic_t uref; /* References for dumping */ u8 htable_bits; /* size of hash table == 2^htable_bits */ struct hbucket bucket[0]; /* hashtable buckets */ }; @@ -207,6 +209,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_del #undef mtype_test_cidrs #undef mtype_test +#undef mtype_uref #undef mtype_expire #undef mtype_resize #undef mtype_head @@ -248,6 +251,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_uref IPSET_TOKEN(MTYPE, _uref) #define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) #define mtype_head IPSET_TOKEN(MTYPE, _head) @@ -595,6 +599,9 @@ retry: t->htable_bits = htable_bits; read_lock_bh(&set->lock); + /* There can't be another parallel resizing, but dumping is possible */ + atomic_set(&orig->ref, 1); + atomic_inc(&orig->uref); for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { @@ -609,6 +616,8 @@ retry: #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); #endif + atomic_set(&orig->ref, 0); + atomic_dec(&orig->uref); read_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) @@ -631,7 +640,11 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig, false); + /* If there's nobody else dumping the table, destroy it */ + if (atomic_dec_and_test(&orig->uref)) { + pr_debug("Table destroy by resize %p\n", orig); + mtype_ahash_destroy(set, orig, false); + } return 0; } @@ -961,13 +974,36 @@ nla_put_failure: return -EMSGSIZE; } +/* Make possible to run dumping parallel with resizing */ +static void +mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) +{ + struct htype *h = set->data; + struct htable *t; + + if (start) { + rcu_read_lock_bh(); + t = rcu_dereference_bh_nfnl(h->table); + atomic_inc(&t->uref); + cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; + rcu_read_unlock_bh(); + } else if (cb->args[IPSET_CB_PRIVATE]) { + t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + /* Resizing didn't destroy the hash table */ + pr_debug("Table destroy by dump: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + cb->args[IPSET_CB_PRIVATE] = 0; + } +} + /* Reply a LIST/SAVE request: dump the elements of the specified set */ static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { - const struct htype *h = set->data; - const struct htable *t = rcu_dereference_bh_nfnl(h->table); + const struct htable *t; struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -980,6 +1016,7 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); + t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); @@ -1047,6 +1084,7 @@ static const struct ip_set_type_variant mtype_variant = { .flush = mtype_flush, .head = mtype_head, .list = mtype_list, + .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, }; -- cgit v0.10.2 From 9c1ba5c809381fb9fb779e2cc22a1c878a269ffb Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 13:39:38 +0200 Subject: netfilter: ipset: Make sure listing doesn't grab a set which is just being destroyed. There was a small window when all sets are destroyed and a concurrent listing of all sets could grab a set which is just being destroyed. Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 777cac6..87b4182 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -32,7 +32,8 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ struct ip_set_net { struct ip_set * __rcu *ip_set_list; /* all individual sets */ ip_set_id_t ip_set_max; /* max number of sets */ - int is_deleted; /* deleted by ip_set_net_exit */ + bool is_deleted; /* deleted by ip_set_net_exit */ + bool is_destroyed; /* all sets are destroyed */ }; static int ip_set_net_id __read_mostly; @@ -980,12 +981,9 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) +ip_set_destroy_set(struct ip_set *set) { - struct ip_set *set = ip_set(inst, index); - pr_debug("set: %s\n", set->name); - ip_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -1025,12 +1023,17 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, goto out; } } + inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL) - ip_set_destroy_set(inst, i); + if (s) { + ip_set(inst, i) = NULL; + ip_set_destroy_set(s); + } } + /* Modified by ip_set_destroy() only, which is serialized */ + inst->is_destroyed = false; } else { s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); @@ -1041,9 +1044,10 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, ret = -IPSET_ERR_BUSY; goto out; } + ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(inst, i); + ip_set_destroy_set(s); } return 0; out: @@ -1283,6 +1287,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; + bool is_destroyed; int ret = 0; if (!cb->args[IPSET_CB_DUMP]) { @@ -1309,12 +1314,20 @@ dump_last: dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; + write_lock_bh(&ip_set_ref_lock); set = ip_set(inst, index); - if (set == NULL) { + is_destroyed = inst->is_destroyed; + if (!set || is_destroyed) { + write_unlock_bh(&ip_set_ref_lock); if (dump_type == DUMP_ONE) { ret = -ENOENT; goto out; } + if (is_destroyed) { + /* All sets are just being destroyed */ + ret = 0; + goto out; + } continue; } /* When dumping all sets, we must dump "sorted" @@ -1322,14 +1335,17 @@ dump_last: */ if (dump_type != DUMP_ONE && ((dump_type == DUMP_ALL) == - !!(set->type->features & IPSET_DUMP_LAST))) + !!(set->type->features & IPSET_DUMP_LAST))) { + write_unlock_bh(&ip_set_ref_lock); continue; + } pr_debug("List set: %s\n", set->name); if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - __ip_set_get(set); + set->ref++; } + write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); @@ -2012,7 +2028,8 @@ ip_set_net_init(struct net *net) list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; - inst->is_deleted = 0; + inst->is_deleted = false; + inst->is_destroyed = false; rcu_assign_pointer(inst->ip_set_list, list); return 0; } @@ -2025,12 +2042,14 @@ ip_set_net_exit(struct net *net) struct ip_set *set = NULL; ip_set_id_t i; - inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + inst->is_deleted = true; /* flag for ip_set_nfnl_put */ for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); - if (set != NULL) - ip_set_destroy_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; + ip_set_destroy_set(set); + } } kfree(rcu_dereference_protected(inst->ip_set_list, 1)); } -- cgit v0.10.2 From bd55389cc34b75948c2876c821175a976bbac5b1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:02:51 +0200 Subject: netfilter:ipset Remove rbtree from hash:net,iface Remove rbtree in order to introduce RCU instead of rwlock in ipset Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index aac2076..3258189 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -37,88 +36,13 @@ MODULE_AUTHOR("Jozsef Kadlecsik "); IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); -/* Interface name rbtree */ - -struct iface_node { - struct rb_node node; - char iface[IFNAMSIZ]; -}; - -#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface) - -static void -rbtree_destroy(struct rb_root *root) -{ - struct iface_node *node, *next; - - rbtree_postorder_for_each_entry_safe(node, next, root, node) - kfree(node); - - *root = RB_ROOT; -} - -static int -iface_test(struct rb_root *root, const char **iface) -{ - struct rb_node *n = root->rb_node; - - while (n) { - const char *d = iface_data(n); - int res = strcmp(*iface, d); - - if (res < 0) - n = n->rb_left; - else if (res > 0) - n = n->rb_right; - else { - *iface = d; - return 1; - } - } - return 0; -} - -static int -iface_add(struct rb_root *root, const char **iface) -{ - struct rb_node **n = &(root->rb_node), *p = NULL; - struct iface_node *d; - - while (*n) { - char *ifname = iface_data(*n); - int res = strcmp(*iface, ifname); - - p = *n; - if (res < 0) - n = &((*n)->rb_left); - else if (res > 0) - n = &((*n)->rb_right); - else { - *iface = ifname; - return 0; - } - } - - d = kzalloc(sizeof(*d), GFP_ATOMIC); - if (!d) - return -ENOMEM; - strcpy(d->iface, *iface); - - rb_link_node(&d->node, p, n); - rb_insert_color(&d->node, root); - - *iface = d->iface; - return 0; -} - /* Type specific function prefix */ #define HTYPE hash_netiface #define IP_SET_HASH_WITH_NETS -#define IP_SET_HASH_WITH_RBTREE #define IP_SET_HASH_WITH_MULTI #define IP_SET_HASH_WITH_NET0 -#define STREQ(a, b) (strcmp(a, b) == 0) +#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ) /* IPv4 variant */ @@ -137,7 +61,7 @@ struct hash_netiface4_elem { u8 cidr; u8 nomatch; u8 elem; - const char *iface; + char iface[IFNAMSIZ]; }; /* Common functions */ @@ -151,7 +75,7 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && - ip1->iface == ip2->iface; + strcmp(ip1->iface, ip2->iface) == 0; } static inline int @@ -239,7 +163,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - int ret; if (e.cidr == 0) return -EINVAL; @@ -249,35 +172,24 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); -#define IFACE(dir) (par->dir ? par->dir->name : NULL) +#define IFACE(dir) (par->dir ? par->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - e.iface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + const char *eiface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); - if (!e.iface) + if (!eiface) return -EINVAL; + STRLCPY(e.iface, eiface); e.physdev = 1; -#else - e.iface = NULL; #endif } else - e.iface = SRCDIR ? IFACE(in) : IFACE(out); + STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); - if (!e.iface) + if (strlen(e.iface) == 0) return -EINVAL; - ret = iface_test(&h->rbtree, &e.iface); - if (adt == IPSET_ADD) { - if (!ret) { - ret = iface_add(&h->rbtree, &e.iface); - if (ret) - return ret; - } - } else if (!ret) - return ret; - return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } @@ -290,7 +202,6 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; - char iface[IFNAMSIZ]; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -314,18 +225,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } - - strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); - e.iface = iface; - ret = iface_test(&h->rbtree, &e.iface); - if (adt == IPSET_ADD) { - if (!ret) { - ret = iface_add(&h->rbtree, &e.iface); - if (ret) - return ret; - } - } else if (!ret) - return ret; + nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -384,7 +284,7 @@ struct hash_netiface6_elem { u8 cidr; u8 nomatch; u8 elem; - const char *iface; + char iface[IFNAMSIZ]; }; /* Common functions */ @@ -398,7 +298,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && - ip1->iface == ip2->iface; + strcmp(ip1->iface, ip2->iface) == 0; } static inline int @@ -473,7 +373,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - int ret; if (e.cidr == 0) return -EINVAL; @@ -485,29 +384,19 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - e.iface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); - if (!e.iface) + const char *eiface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); + if (!eiface) return -EINVAL; + STRLCPY(e.iface, eiface); e.physdev = 1; -#else - e.iface = NULL; #endif } else - e.iface = SRCDIR ? IFACE(in) : IFACE(out); + STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); - if (!e.iface) + if (strlen(e.iface) == 0) return -EINVAL; - ret = iface_test(&h->rbtree, &e.iface); - if (adt == IPSET_ADD) { - if (!ret) { - ret = iface_add(&h->rbtree, &e.iface); - if (ret) - return ret; - } - } else if (!ret) - return ret; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } @@ -516,11 +405,9 @@ static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - char iface[IFNAMSIZ]; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -549,17 +436,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], ip6_netmask(&e.ip, e.cidr); - strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); - e.iface = iface; - ret = iface_test(&h->rbtree, &e.iface); - if (adt == IPSET_ADD) { - if (!ret) { - ret = iface_add(&h->rbtree, &e.iface); - if (ret) - return ret; - } - } else if (!ret) - return ret; + nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); -- cgit v0.10.2 From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:22:25 +0200 Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking accordingly. Convert the comment extension into an rcu-avare object. Also, simplify the timeout routines. Signed-off-by: Jozsef Kadlecsik diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5674b6a..19b4969 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -108,8 +108,13 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set_comment_rcu { + struct rcu_head rcu; + char str[0]; +}; + struct ip_set_comment { - char *str; + struct ip_set_comment_rcu __rcu *c; }; struct ip_set_skbinfo { @@ -226,7 +231,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 21217ea..8d02485 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb) return nla_data(tb); } +/* Called from uadd only, protected by the set spinlock. + * The kadt functions don't use the comment extensions in any way. + */ static inline void ip_set_init_comment(struct ip_set_comment *comment, const struct ip_set_ext *ext) { + struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; - if (unlikely(comment->str)) { - kfree(comment->str); - comment->str = NULL; + if (unlikely(c)) { + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } if (!len) return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - comment->str = kzalloc(len + 1, GFP_ATOMIC); - if (unlikely(!comment->str)) + c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + if (unlikely(!c)) return; - strlcpy(comment->str, ext->comment, len + 1); + strlcpy(c->str, ext->comment, len + 1); + rcu_assign_pointer(comment->c, c); } +/* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) { - if (!comment->str) + struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + + if (!c) return 0; - return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); + return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); } +/* Called from uadd/udel, flush or the garbage collectors protected + * by the set spinlock. + * Called when the set is destroyed and when there can't be any user + * of the set data anymore. + */ static inline void ip_set_comment_free(struct ip_set_comment *comment) { - if (unlikely(!comment->str)) + struct ip_set_comment_rcu *c; + + c = rcu_dereference_protected(comment->c, 1); + if (unlikely(!c)) return; - kfree(comment->str); - comment->str = NULL; + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } #endif diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3c8842b..1d6a935 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +ip_set_timeout_expired(unsigned long *t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); -} - -static inline bool -ip_set_timeout_expired(unsigned long *timeout) -{ - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { + unsigned long t; + + if (!value) { *timeout = IPSET_ELEM_PERMANENT; return; } - *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + *timeout = t; } static inline u32 diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 87b4182..2b21a19 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", type->name, family_name(type->family), type->revision_min); - ret = -EINVAL; - goto unlock; + ip_set_type_unlock(); + return -EINVAL; } list_add_rcu(&type->list, &ip_set_type_list); pr_debug("type %s, family %s, revision %u:%u registered.\n", type->name, family_name(type->family), type->revision_min, type->revision_max); -unlock: ip_set_type_unlock(); + return ret; } EXPORT_SYMBOL_GPL(ip_set_type_register); @@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u not registered\n", type->name, family_name(type->family), type->revision_min); - goto unlock; + ip_set_type_unlock(); + return; } list_del_rcu(&type->list); pr_debug("type %s, family %s with revision min %u unregistered.\n", type->name, family_name(type->family), type->revision_min); -unlock: ip_set_type_unlock(); synchronize_rcu(); @@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); if (!set) return -ENOMEM; - rwlock_init(&set->lock); + spin_lock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; @@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->flush(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); } static int @@ -1377,9 +1377,9 @@ dump_last: set->variant->uref(set, cb, true); /* Fall through and add elements */ default: - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->list(set, skb, cb); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; @@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); retried = true; } while (ret == -EAGAIN && set->variant->resize && @@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) ret = 1; -- cgit v0.10.2 From 96f51428c43de20723630f0d756a7a9a42cbd974 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:39:59 +0200 Subject: netfilter: ipset: Introduce RCU locking in bitmap:* types There's nothing much required because the bitmap types use atomic bit operations. However the logic of adding elements slightly changed: first the MAC address updated (which is not atomic), then the element activated (added). The extensions may call kfree_rcu() therefore we call rcu_barrier() at module removal. Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6f024a8..86429f3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, set))) + ip_set_timeout_expired(ext_timeout(x, set))) { ret = 0; - else if (!(flags & IPSET_FLAG_EXIST)) + } else if (!(flags & IPSET_FLAG_EXIST)) { + set_bit(e->id, map->members); return -IPSET_ERR_EXIST; + } /* Element is re-added, cleanup extensions */ ip_set_ext_destroy(set, x); } @@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_init_comment(ext_comment(x, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(x, set), ext); + + /* Activate element */ + set_bit(e->id, map->members); + return 0; } @@ -203,10 +209,13 @@ mtype_list(const struct ip_set *set, struct nlattr *adt, *nested; void *x; u32 id, first = cb->args[IPSET_CB_ARG0]; + int ret = 0; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; + /* Extensions may be replaced */ + rcu_read_lock(); for (; cb->args[IPSET_CB_ARG0] < map->elements; cb->args[IPSET_CB_ARG0]++) { id = cb->args[IPSET_CB_ARG0]; @@ -222,9 +231,11 @@ mtype_list(const struct ip_set *set, if (!nested) { if (id == first) { nla_nest_cancel(skb, adt); - return -EMSGSIZE; - } else - goto nla_put_failure; + ret = -EMSGSIZE; + goto out; + } + + goto nla_put_failure; } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; @@ -238,16 +249,18 @@ mtype_list(const struct ip_set *set, /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; - return 0; + goto out; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { cb->args[IPSET_CB_ARG0] = 0; - return -EMSGSIZE; + ret = -EMSGSIZE; } ipset_nest_end(skb, adt); - return 0; +out: + rcu_read_unlock(); + return ret; } static void @@ -260,7 +273,7 @@ mtype_gc(unsigned long ul_set) /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); + spin_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { x = get_ext(set, map, id); @@ -269,7 +282,7 @@ mtype_gc(unsigned long ul_set) ip_set_ext_destroy(set, x); } } - read_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 7af99c3..b8ce474 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -81,7 +81,7 @@ static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, u32 flags, size_t dsize) { - return !!test_and_set_bit(e->id, map->members); + return !!test_bit(e->id, map->members); } static inline int @@ -376,6 +376,7 @@ bitmap_ip_init(void) static void __exit bitmap_ip_fini(void) { + rcu_barrier(); ip_set_type_unregister(&bitmap_ip_type); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 7733422..fe00e87 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -147,15 +147,23 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, struct bitmap_ipmac_elem *elem; elem = get_elem(map->extensions, e->id, dsize); - if (test_and_set_bit(e->id, map->members)) { + if (test_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { - if (e->ether && (flags & IPSET_FLAG_EXIST)) + if (e->ether && + (flags & IPSET_FLAG_EXIST) && + !ether_addr_equal(e->ether, elem->ether)) { + /* memcpy isn't atomic */ + clear_bit(e->id, map->members); + smp_mb__after_atomic(); memcpy(elem->ether, e->ether, ETH_ALEN); + } return IPSET_ADD_FAILED; } else if (!e->ether) /* Already added without ethernet address */ return IPSET_ADD_FAILED; /* Fill the MAC address and trigger the timer activation */ + clear_bit(e->id, map->members); + smp_mb__after_atomic(); memcpy(elem->ether, e->ether, ETH_ALEN); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; @@ -413,6 +421,7 @@ bitmap_ipmac_init(void) static void __exit bitmap_ipmac_fini(void) { + rcu_barrier(); ip_set_type_unregister(&bitmap_ipmac_type); } diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index ec3bda1..2d360f9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -73,7 +73,7 @@ static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, struct bitmap_port *map, u32 flags, size_t dsize) { - return !!test_and_set_bit(e->id, map->members); + return !!test_bit(e->id, map->members); } static inline int @@ -306,6 +306,7 @@ bitmap_port_init(void) static void __exit bitmap_port_fini(void) { + rcu_barrier(); ip_set_type_unregister(&bitmap_port_type); } -- cgit v0.10.2 From 18f84d41d34fa35d0d64bbaea01fe664553ecc06 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 17:29:56 +0200 Subject: netfilter: ipset: Introduce RCU locking in hash:* types Three types of data need to be protected in the case of the hash types: a. The hash buckets: standard rcu pointer operations are used. b. The element blobs in the hash buckets are stored in an array and a bitmap is used for book-keeping to tell which elements in the array are used or free. c. Networks per cidr values and the cidr values themselves are stored in fix sized arrays and need no protection. The values are modified in such an order that in the worst case an element testing is repeated once with the same cidr value. The ipset hash approach uses arrays instead of lists and therefore is incompatible with rhashtable. Performance is tested by Jesper Dangaard Brouer: Simple drop in FORWARD ~~~~~~~~~~~~~~~~~~~~~~ Dropping via simple iptables net-mask match:: iptables -t raw -N simple || iptables -t raw -F simple iptables -t raw -I simple -s 198.18.0.0/15 -j DROP iptables -t raw -D PREROUTING -j simple iptables -t raw -I PREROUTING -j simple Drop performance in "raw": 11.3Mpps Generator: sending 12.2Mpps (tx:12264083 pps) Drop via original ipset in RAW table ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a set with lots of elements:: sudo ./ipset destroy test echo "create test hash:ip hashsize 65536" > test.set for x in `seq 0 255`; do for y in `seq 0 255`; do echo "add test 198.18.$x.$y" >> test.set done done sudo ./ipset restore < test.set Dropping via ipset:: iptables -t raw -F iptables -t raw -N net198 || iptables -t raw -F net198 iptables -t raw -I net198 -m set --match-set test src -j DROP iptables -t raw -I PREROUTING -j net198 Drop performance in "raw" with ipset: 8Mpps Perf report numbers ipset drop in "raw":: + 24.65% ksoftirqd/1 [ip_set] [k] ip_set_test - 21.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_lock_bh - _raw_read_lock_bh + 99.88% ip_set_test - 19.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_unlock_bh - _raw_read_unlock_bh + 99.72% ip_set_test + 4.31% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_kadt + 2.27% ksoftirqd/1 [ixgbe] [k] ixgbe_fetch_rx_buffer + 2.18% ksoftirqd/1 [ip_tables] [k] ipt_do_table + 1.81% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_test + 1.61% ksoftirqd/1 [kernel.kallsyms] [k] __netif_receive_skb_core + 1.44% ksoftirqd/1 [kernel.kallsyms] [k] build_skb + 1.42% ksoftirqd/1 [kernel.kallsyms] [k] ip_rcv + 1.36% ksoftirqd/1 [kernel.kallsyms] [k] __local_bh_enable_ip + 1.16% ksoftirqd/1 [kernel.kallsyms] [k] dev_gro_receive + 1.09% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock + 0.96% ksoftirqd/1 [ixgbe] [k] ixgbe_clean_rx_irq + 0.95% ksoftirqd/1 [kernel.kallsyms] [k] __netdev_alloc_frag + 0.88% ksoftirqd/1 [kernel.kallsyms] [k] kmem_cache_alloc + 0.87% ksoftirqd/1 [xt_set] [k] set_match_v3 + 0.85% ksoftirqd/1 [kernel.kallsyms] [k] inet_gro_receive + 0.83% ksoftirqd/1 [kernel.kallsyms] [k] nf_iterate + 0.76% ksoftirqd/1 [kernel.kallsyms] [k] put_compound_page + 0.75% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_lock Drop via ipset in RAW table with RCU-locking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With RCU locking, the RW-lock is gone. Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps Performance-tested-by: Jesper Dangaard Brouer Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 5fcf70b..f352cc0 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -10,19 +10,19 @@ #include #include +#include #include -#ifndef rcu_dereference_bh -#define rcu_dereference_bh(p) rcu_dereference(p) -#endif + +#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) +#define ipset_dereference_protected(p, set) \ + __ipset_dereference_protected(p, spin_is_locked(&(set)->lock)) #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. * Internally jhash is used with the assumption that the size of the - * stored data is a multiple of sizeof(u32). If storage supports timeout, - * the timeout field must be the last one in the data structure - that field - * is ignored when computing the hash key. + * stored data is a multiple of sizeof(u32). * * Readers and resizing * @@ -36,6 +36,8 @@ #define AHASH_INIT_SIZE 4 /* Max number of elements to store in an array block */ #define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) +/* Max muber of elements in the array block when tuned */ +#define AHASH_MAX_TUNED 64 /* Max number of elements can be tuned */ #ifdef IP_SET_HASH_WITH_MULTI @@ -53,7 +55,7 @@ tune_ahash_max(u8 curr, u32 multi) /* Currently, at listing one hash bucket must fit into a message. * Therefore we have a hard limit here. */ - return n > curr && n <= 64 ? n : curr; + return n > curr && n <= AHASH_MAX_TUNED ? n : curr; } #define TUNE_AHASH_MAX(h, multi) \ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) @@ -64,20 +66,23 @@ tune_ahash_max(u8 curr, u32 multi) /* A hash bucket */ struct hbucket { - void *value; /* the array of the values */ + struct rcu_head rcu; /* for call_rcu_bh */ + /* Which positions are used in the array */ + DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ u8 pos; /* position of the first free entry */ -}; + unsigned char value[0]; /* the array of the values */ +} __attribute__ ((aligned)); /* The hash table: the table size stored here in order to make resizing easy */ struct htable { atomic_t ref; /* References for resizing */ atomic_t uref; /* References for dumping */ u8 htable_bits; /* size of hash table == 2^htable_bits */ - struct hbucket bucket[0]; /* hashtable buckets */ + struct hbucket __rcu *bucket[0]; /* hashtable buckets */ }; -#define hbucket(h, i) (&((h)->bucket[i])) +#define hbucket(h, i) ((h)->bucket[i]) #ifndef IPSET_NET_COUNT #define IPSET_NET_COUNT 1 @@ -85,8 +90,8 @@ struct htable { /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ - u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ + u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ }; /* Compute the hash table size */ @@ -99,11 +104,11 @@ htable_size(u8 hbits) if (hbits > 31) return 0; hsize = jhash_size(hbits); - if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket) + if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *) < hsize) return 0; - return hsize * sizeof(struct hbucket) + sizeof(struct htable); + return hsize * sizeof(struct hbucket *) + sizeof(struct htable); } /* Compute htable_bits from the user input parameter hashsize */ @@ -112,6 +117,7 @@ htable_bits(u32 hashsize) { /* Assume that hashsize == 2^htable_bits */ u8 bits = fls(hashsize - 1); + if (jhash_size(bits) != hashsize) /* Round up to the first 2^n value */ bits = fls(hashsize); @@ -119,30 +125,6 @@ htable_bits(u32 hashsize) return bits; } -static int -hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) -{ - if (n->pos >= n->size) { - void *tmp; - - if (n->size >= ahash_max) - /* Trigger rehashing */ - return -EAGAIN; - - tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize, - GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - if (n->size) { - memcpy(tmp, n->value, n->size * dsize); - kfree(n->value); - } - n->value = tmp; - n->size += AHASH_INIT_SIZE; - } - return 0; -} - #ifdef IP_SET_HASH_WITH_NETS #if IPSET_NET_COUNT > 1 #define __CIDR(cidr, i) (cidr[i]) @@ -300,9 +282,6 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif -#ifdef IP_SET_HASH_WITH_RBTREE - struct rb_root rbtree; -#endif #ifdef IP_SET_HASH_WITH_NETS struct net_prefixes nets[0]; /* book-keeping of prefixes */ #endif @@ -345,8 +324,8 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) for (i = 0; i < nets_length; i++) { if (h->nets[i].cidr[n] != cidr) continue; - h->nets[cidr -1].nets[n]--; - if (h->nets[cidr -1].nets[n] > 0) + h->nets[cidr - 1].nets[n]--; + if (h->nets[cidr - 1].nets[n] > 0) return; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; @@ -362,15 +341,18 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, u8 nets_length, size_t dsize) { u32 i; - size_t memsize = sizeof(*h) - + sizeof(*t) + struct hbucket *n; + size_t memsize = sizeof(*h) + sizeof(*t); + #ifdef IP_SET_HASH_WITH_NETS - + sizeof(struct net_prefixes) * nets_length + memsize += sizeof(struct net_prefixes) * nets_length; #endif - + jhash_size(t->htable_bits) * sizeof(struct hbucket); - - for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * dsize; + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = rcu_dereference_bh(hbucket(t, i)); + if (!n) + continue; + memsize += sizeof(struct hbucket) + n->size * dsize; + } return memsize; } @@ -385,7 +367,8 @@ mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) int i; for (i = 0; i < n->pos; i++) - ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); + if (test_bit(i, n->used)) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); } /* Flush a hash type of set: destroy all elements */ @@ -397,16 +380,16 @@ mtype_flush(struct ip_set *set) struct hbucket *n; u32 i; - t = rcu_dereference_bh_nfnl(h->table); + t = ipset_dereference_protected(h->table, set); for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) { - if (set->extensions & IPSET_EXT_DESTROY) - mtype_ext_cleanup(set, n); - n->size = n->pos = 0; - /* FIXME: use slab cache */ - kfree(n->value); - } + n = __ipset_dereference_protected(hbucket(t, i), 1); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + rcu_assign_pointer(hbucket(t, i), NULL); + kfree_rcu(n, rcu); } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); @@ -422,13 +405,13 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) { - if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) - mtype_ext_cleanup(set, n); - /* FIXME: use slab cache */ - kfree(n->value); - } + n = __ipset_dereference_protected(hbucket(t, i), 1); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n); } ip_set_free(t); @@ -443,10 +426,8 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); -#ifdef IP_SET_HASH_WITH_RBTREE - rbtree_destroy(&h->rbtree); -#endif + mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1), + true); kfree(h); set->data = NULL; @@ -491,20 +472,26 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) struct htable *t; struct hbucket *n; struct mtype_elem *data; - u32 i; - int j; + u32 i, j, d; #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); + t = ipset_dereference_protected(h->table, set); for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - for (j = 0; j < n->pos; j++) { + n = __ipset_dereference_protected(hbucket(t, i), 1); + if (!n) + continue; + for (j = 0, d = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) { + d++; + continue; + } data = ahash_data(n, j, dsize); if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); + clear_bit(j, n->used); + smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) mtype_del_cidr(h, @@ -513,29 +500,31 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) nets_length, k); #endif ip_set_ext_destroy(set, data); - if (j != n->pos - 1) - /* Not last one */ - memcpy(data, - ahash_data(n, n->pos - 1, dsize), - dsize); - n->pos--; h->elements--; + d++; } } - if (n->pos + AHASH_INIT_SIZE < n->size) { - void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * dsize, - GFP_ATOMIC); + if (d >= AHASH_INIT_SIZE) { + struct hbucket *tmp = kzalloc(sizeof(*tmp) + + (n->size - AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); if (!tmp) /* Still try to delete expired elements */ continue; - n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * dsize); - kfree(n->value); - n->value = tmp; + tmp->size = n->size - AHASH_INIT_SIZE; + for (j = 0, d = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); + memcpy(tmp->value + d * dsize, data, dsize); + set_bit(j, tmp->used); + d++; + } + tmp->pos = d; + rcu_assign_pointer(hbucket(t, i), tmp); + kfree_rcu(n, rcu); } } - rcu_read_unlock_bh(); } static void @@ -545,9 +534,9 @@ mtype_gc(unsigned long ul_set) struct htype *h = set->data; pr_debug("called\n"); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); mtype_expire(set, h, NLEN(set->family), set->dsize); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); @@ -560,80 +549,115 @@ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; - struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); - u8 htable_bits = orig->htable_bits; + struct htable *t, *orig; + u8 htable_bits; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; + struct mtype_elem *tmp; #endif struct mtype_elem *data; struct mtype_elem *d; struct hbucket *n, *m; - u32 i, j; + u32 i, j, key; int ret; - /* Try to cleanup once */ - if (SET_WITH_TIMEOUT(set) && !retried) { - i = h->elements; - write_lock_bh(&set->lock); - mtype_expire(set, set->data, NLEN(set->family), set->dsize); - write_unlock_bh(&set->lock); - if (h->elements < i) - return 0; - } +#ifdef IP_SET_HASH_WITH_NETS + tmp = kmalloc(dsize, GFP_KERNEL); + if (!tmp) + return -ENOMEM; +#endif + rcu_read_lock_bh(); + orig = rcu_dereference_bh_nfnl(h->table); + htable_bits = orig->htable_bits; + rcu_read_unlock_bh(); retry: ret = 0; htable_bits++; - pr_debug("attempt to resize set %s from %u to %u, t %p\n", - set->name, orig->htable_bits, htable_bits, orig); if (!htable_bits) { /* In case we have plenty of memory :-) */ pr_warn("Cannot increase the hashsize of set %s further\n", set->name); - return -IPSET_ERR_HASH_FULL; + ret = -IPSET_ERR_HASH_FULL; + goto out; + } + t = ip_set_alloc(htable_size(htable_bits)); + if (!t) { + ret = -ENOMEM; + goto out; } - t = ip_set_alloc(sizeof(*t) - + jhash_size(htable_bits) * sizeof(struct hbucket)); - if (!t) - return -ENOMEM; t->htable_bits = htable_bits; - read_lock_bh(&set->lock); + spin_lock_bh(&set->lock); + orig = __ipset_dereference_protected(h->table, 1); /* There can't be another parallel resizing, but dumping is possible */ atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = hbucket(orig, i); + n = __ipset_dereference_protected(hbucket(orig, i), 1); + if (!n) + continue; for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, set->dsize); + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); #ifdef IP_SET_HASH_WITH_NETS + /* We have readers running parallel with us, + * so the live data cannot be modified. + */ flags = 0; + memcpy(tmp, data, dsize); + data = tmp; mtype_data_reset_flags(data, &flags); #endif - m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); - if (ret < 0) { -#ifdef IP_SET_HASH_WITH_NETS - mtype_data_reset_flags(data, &flags); -#endif - atomic_set(&orig->ref, 0); - atomic_dec(&orig->uref); - read_unlock_bh(&set->lock); - mtype_ahash_destroy(set, t, false); - if (ret == -EAGAIN) - goto retry; - return ret; + key = HKEY(data, h->initval, htable_bits); + m = __ipset_dereference_protected(hbucket(t, key), 1); + if (!m) { + m = kzalloc(sizeof(*m) + + AHASH_INIT_SIZE * dsize, + GFP_ATOMIC); + if (!m) { + ret = -ENOMEM; + goto cleanup; + } + m->size = AHASH_INIT_SIZE; + RCU_INIT_POINTER(hbucket(t, key), m); + } else if (m->pos >= m->size) { + struct hbucket *ht; + + if (m->size >= AHASH_MAX(h)) { + ret = -EAGAIN; + } else { + ht = kzalloc(sizeof(*ht) + + (m->size + AHASH_INIT_SIZE) + * dsize, + GFP_ATOMIC); + if (!ht) + ret = -ENOMEM; + } + if (ret < 0) + goto cleanup; + memcpy(ht, m, sizeof(struct hbucket) + + m->size * dsize); + ht->size = m->size + AHASH_INIT_SIZE; + kfree(m); + m = ht; + RCU_INIT_POINTER(hbucket(t, key), ht); } - d = ahash_data(m, m->pos++, set->dsize); - memcpy(d, data, set->dsize); + d = ahash_data(m, m->pos, dsize); + memcpy(d, data, dsize); + set_bit(m->pos++, m->used); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif } } - rcu_assign_pointer(h->table, t); - read_unlock_bh(&set->lock); + + spin_unlock_bh(&set->lock); /* Give time to other readers of the set */ synchronize_rcu_bh(); @@ -646,7 +670,20 @@ retry: mtype_ahash_destroy(set, orig, false); } - return 0; +out: +#ifdef IP_SET_HASH_WITH_NETS + kfree(tmp); +#endif + return ret; + +cleanup: + atomic_set(&orig->ref, 0); + atomic_dec(&orig->uref); + spin_unlock_bh(&set->lock); + mtype_ahash_destroy(set, t, false); + if (ret == -EAGAIN) + goto retry; + goto out; } /* Add an element to a hash and update the internal counters when succeeded, @@ -659,17 +696,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; - struct hbucket *n; - int i, ret = 0; - int j = AHASH_MAX(h) + 1; + struct hbucket *n, *old = ERR_PTR(-ENOENT); + int i, j = -1; bool flag_exist = flags & IPSET_FLAG_EXIST; + bool deleted = false, forceadd = false, reuse = false; u32 key, multi = 0; - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); + if (h->elements >= h->maxelem) { + if (SET_WITH_TIMEOUT(set)) + /* FIXME: when set is full, we slow down here */ + mtype_expire(set, h, NLEN(set->family), set->dsize); + if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + forceadd = true; + } + + t = ipset_dereference_protected(h->table, set); key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t, key); + n = __ipset_dereference_protected(hbucket(t, key), 1); + if (!n) { + if (forceadd) { + if (net_ratelimit()) + pr_warn("Set %s is full, maxelem %u reached\n", + set->name, h->maxelem); + return -IPSET_ERR_HASH_FULL; + } else if (h->elements >= h->maxelem) { + goto set_full; + } + old = NULL; + n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, + GFP_ATOMIC); + if (!n) + return -ENOMEM; + n->size = AHASH_INIT_SIZE; + goto copy_elem; + } for (i = 0; i < n->pos; i++) { + if (!test_bit(i, n->used)) { + /* Reuse first deleted entry */ + if (j == -1) { + deleted = reuse = true; + j = i; + } + continue; + } data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || @@ -677,85 +746,94 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; - goto reuse_slot; - } else { - ret = -IPSET_ERR_EXIST; - goto out; + goto overwrite_extensions; } + return -IPSET_ERR_EXIST; } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(data, set)) && - j != AHASH_MAX(h) + 1) + j == -1) { j = i; + reuse = true; + } } - if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set) && n->pos) { - /* Choosing the first entry in the array to replace */ - j = 0; - goto reuse_slot; - } - if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) - /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h, NLEN(set->family), set->dsize); - - if (h->elements >= h->maxelem) { - if (net_ratelimit()) - pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - ret = -IPSET_ERR_HASH_FULL; - goto out; - } - -reuse_slot: - if (j != AHASH_MAX(h) + 1) { - /* Fill out reused slot */ + if (reuse || forceadd) { data = ahash_data(n, j, set->dsize); + if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS - for (i = 0; i < IPSET_NET_COUNT; i++) { - mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), - NLEN(set->family), i); - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), - NLEN(set->family), i); - } + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, i)), + NLEN(set->family), i); #endif - ip_set_ext_destroy(set, data); - } else { - /* Use/create a new slot */ + ip_set_ext_destroy(set, data); + h->elements--; + } + goto copy_data; + } + if (h->elements >= h->maxelem) + goto set_full; + /* Create a new slot */ + if (n->pos >= n->size) { TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); - if (ret != 0) { - if (ret == -EAGAIN) - mtype_data_next(&h->next, d); - goto out; + if (n->size >= AHASH_MAX(h)) { + /* Trigger rehashing */ + mtype_data_next(&h->next, d); + return -EAGAIN; } - data = ahash_data(n, n->pos++, set->dsize); + old = n; + n = kzalloc(sizeof(*n) + + (old->size + AHASH_INIT_SIZE) * set->dsize, + GFP_ATOMIC); + if (!n) + return -ENOMEM; + memcpy(n, old, sizeof(struct hbucket) + + old->size * set->dsize); + n->size = old->size + AHASH_INIT_SIZE; + } + +copy_elem: + j = n->pos++; + data = ahash_data(n, j, set->dsize); +copy_data: + h->elements++; #ifdef IP_SET_HASH_WITH_NETS - for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), - NLEN(set->family), i); + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), + NLEN(set->family), i); #endif - h->elements++; - } memcpy(data, d, sizeof(struct mtype_elem)); +overwrite_extensions: #ifdef IP_SET_HASH_WITH_NETS mtype_data_set_flags(data, flags); #endif - if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(data, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(data, set), ext); + /* Must come last for the case when timed out entry is reused */ + if (SET_WITH_TIMEOUT(set)) + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); + smp_mb__before_atomic(); + set_bit(j, n->used); + if (old != ERR_PTR(-ENOENT)) { + rcu_assign_pointer(hbucket(t, key), n); + if (old) + kfree_rcu(old, rcu); + } -out: - rcu_read_unlock_bh(); - return ret; + return 0; +set_full: + if (net_ratelimit()) + pr_warn("Set %s is full, maxelem %u reached\n", + set->name, h->maxelem); + return -IPSET_ERR_HASH_FULL; } -/* Delete an element from the hash: swap it with the last element - * and free up space if possible. +/* Delete an element from the hash and free up space if possible. */ static int mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -766,29 +844,32 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i, ret = -IPSET_ERR_EXIST; -#ifdef IP_SET_HASH_WITH_NETS - u8 j; -#endif + int i, j, k, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; + size_t dsize = set->dsize; - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); + t = ipset_dereference_protected(h->table, set); key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, set->dsize); + n = __ipset_dereference_protected(hbucket(t, key), 1); + if (!n) + goto out; + for (i = 0, k = 0; i < n->pos; i++) { + if (!test_bit(i, n->used)) { + k++; + continue; + } + data = ahash_data(n, i, dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(data, set))) goto out; - if (i != n->pos - 1) - /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, set->dsize), - set->dsize); - n->pos--; + ret = 0; + clear_bit(i, n->used); + smp_mb__after_atomic(); + if (i + 1 == n->pos) + n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) @@ -796,25 +877,37 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, NLEN(set->family), j); #endif ip_set_ext_destroy(set, data); - if (n->pos + AHASH_INIT_SIZE < n->size) { - void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * set->dsize, - GFP_ATOMIC); - if (!tmp) { - ret = 0; + + for (; i < n->pos; i++) { + if (!test_bit(i, n->used)) + k++; + } + if (n->pos == 0 && k == 0) { + rcu_assign_pointer(hbucket(t, key), NULL); + kfree_rcu(n, rcu); + } else if (k >= AHASH_INIT_SIZE) { + struct hbucket *tmp = kzalloc(sizeof(*tmp) + + (n->size - AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); + if (!tmp) goto out; + tmp->size = n->size - AHASH_INIT_SIZE; + for (j = 0, k = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); + memcpy(tmp->value + k * dsize, data, dsize); + set_bit(j, tmp->used); + k++; } - n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * set->dsize); - kfree(n->value); - n->value = tmp; + tmp->pos = k; + rcu_assign_pointer(hbucket(t, key), tmp); + kfree_rcu(n, rcu); } - ret = 0; goto out; } out: - rcu_read_unlock_bh(); return ret; } @@ -865,8 +958,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); + n = rcu_dereference_bh(hbucket(t, key)); + if (!n) + continue; for (i = 0; i < n->pos; i++) { + if (!test_bit(i, n->used)) + continue; data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; @@ -904,7 +1001,6 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, int i, ret = 0; u32 key, multi = 0; - rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, @@ -919,8 +1015,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, #endif key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); + n = rcu_dereference_bh(hbucket(t, key)); + if (!n) { + ret = 0; + goto out; + } for (i = 0; i < n->pos; i++) { + if (!test_bit(i, n->used)) + continue; data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && @@ -930,7 +1032,6 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, } } out: - rcu_read_unlock_bh(); return ret; } @@ -942,15 +1043,19 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) const struct htable *t; struct nlattr *nested; size_t memsize; + u8 htable_bits; + rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); + htable_bits = t->htable_bits; + rcu_read_unlock_bh(); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(t->htable_bits))) || + htonl(jhash_size(htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_NETMASK @@ -1010,20 +1115,27 @@ mtype_list(const struct ip_set *set, u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; - int i; + int i, ret = 0; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; + pr_debug("list hash set %s\n", set->name); t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; + /* Expire may replace a hbucket with another one */ + rcu_read_lock(); for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[IPSET_CB_ARG0]); + n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); pr_debug("cb->arg bucket: %lu, t %p n %p\n", cb->args[IPSET_CB_ARG0], t, n); + if (!n) + continue; for (i = 0; i < n->pos; i++) { + if (!test_bit(i, n->used)) + continue; e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) @@ -1034,7 +1146,8 @@ mtype_list(const struct ip_set *set, if (!nested) { if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); - return -EMSGSIZE; + ret = -EMSGSIZE; + goto out; } else goto nla_put_failure; } @@ -1049,7 +1162,7 @@ mtype_list(const struct ip_set *set, /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; - return 0; + goto out; nla_put_failure: nlmsg_trim(skb, incomplete); @@ -1057,10 +1170,12 @@ nla_put_failure: pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", set->name); cb->args[IPSET_CB_ARG0] = 0; - return -EMSGSIZE; - } - ipset_nest_end(skb, atd); - return 0; + ret = -EMSGSIZE; + } else + ipset_nest_end(skb, atd); +out: + rcu_read_unlock(); + return ret; } static int @@ -1122,12 +1237,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -#ifdef IP_SET_HASH_WITH_MARKMASK - !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) || -#endif !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; +#ifdef IP_SET_HASH_WITH_MARKMASK + /* Separated condition in order to avoid directive in argument list */ + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) + return -IPSET_ERR_PROTOCOL; +#endif if (tb[IPSET_ATTR_HASHSIZE]) { hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); @@ -1150,7 +1267,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #endif #ifdef IP_SET_HASH_WITH_MARKMASK if (tb[IPSET_ATTR_MARKMASK]) { - markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK])); + markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); if (markmask == 0) return -IPSET_ERR_INVALID_MARKMASK; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 2bbadcc..f54d706 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -315,6 +315,7 @@ hash_ip_init(void) static void __exit hash_ip_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_ip_type); } diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 3aafb36..f8fbc32 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -319,6 +319,7 @@ hash_ipmark_init(void) static void __exit hash_ipmark_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_ipmark_type); } diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 4db1270..9a31db8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -382,6 +382,7 @@ hash_ipport_init(void) static void __exit hash_ipport_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_ipport_type); } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index c01bf68..fc42489 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -397,6 +397,7 @@ hash_ipportip_init(void) static void __exit hash_ipportip_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_ipportip_type); } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 50248de..2a69b9b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -554,6 +554,7 @@ hash_ipportnet_init(void) static void __exit hash_ipportnet_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_ipportnet_type); } diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 8981c8b..112aff3 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -165,6 +165,7 @@ hash_mac_init(void) static void __exit hash_mac_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_mac_type); } diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 089b23f..e49b1d0 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -392,6 +392,7 @@ hash_net_init(void) static void __exit hash_net_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_net_type); } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 3258189..42c893e 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -500,6 +500,7 @@ hash_netiface_init(void) static void __exit hash_netiface_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_netiface_type); } diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index ed9cc45..b5428be 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -480,6 +480,7 @@ hash_netnet_init(void) static void __exit hash_netnet_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_netnet_type); } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index fbaf813..27307d0 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -498,6 +498,7 @@ hash_netport_init(void) static void __exit hash_netport_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_netport_type); } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index a828cbc..1e0e47a 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -581,6 +581,7 @@ hash_netportnet_init(void) static void __exit hash_netportnet_fini(void) { + rcu_barrier(); ip_set_type_unregister(&hash_netportnet_type); } -- cgit v0.10.2 From 00590fdd5be0d763631ef10e6a3e2ce8fc2d9ec3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 16:56:02 +0200 Subject: netfilter: ipset: Introduce RCU locking in list type Standard rculist is used. Signed-off-by: Jozsef Kadlecsik diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 107ea6c..9f624ee 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -27,6 +28,8 @@ MODULE_ALIAS("ip_set_list:set"); /* Member elements */ struct set_elem { + struct rcu_head rcu; + struct list_head list; ip_set_id_t id; }; @@ -41,12 +44,9 @@ struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ struct net *net; /* namespace */ - struct set_elem members[0]; /* the set members */ + struct list_head members; /* the set members */ }; -#define list_set_elem(set, map, id) \ - (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) - static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -54,17 +54,14 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i, cmdflags = opt->cmdflags; + u32 cmdflags = opt->cmdflags; int ret; /* Don't lookup sub-counters at all */ opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -115,13 +108,9 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -138,110 +127,65 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + int ret = -EINVAL; + rcu_read_lock(); switch (adt) { case IPSET_TEST: - return list_set_ktest(set, skb, par, opt, &ext); + ret = list_set_ktest(set, skb, par, opt, &ext); + break; case IPSET_ADD: - return list_set_kadd(set, skb, par, opt, &ext); + ret = list_set_kadd(set, skb, par, opt, &ext); + break; case IPSET_DEL: - return list_set_kdel(set, skb, par, opt, &ext); + ret = list_set_kdel(set, skb, par, opt, &ext); + break; default: break; } - return -EINVAL; -} - -static bool -id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) -{ - const struct list_set *map = set->data; - const struct set_elem *e; - - if (i >= map->size) - return 0; + rcu_read_unlock(); - e = list_set_elem(set, map, i); - return !!(e->id == id && - !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set)))); + return ret; } -static int -list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, - const struct ip_set_ext *ext) -{ - struct list_set *map = set->data; - struct set_elem *e = list_set_elem(set, map, i); - - if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) { - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - } else { - struct set_elem *x = list_set_elem(set, map, - map->size - 1); - - /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) { - ip_set_put_byindex(map->net, x->id); - ip_set_ext_destroy(set, x); - } - memmove(list_set_elem(set, map, i + 1), e, - set->dsize * (map->size - (i + 1))); - /* Extensions must be initialized to zero */ - memset(e, 0, set->dsize); - } - } - - e->id = d->id; - if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, set), ext->timeout); - if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); - if (SET_WITH_SKBINFO(set)) - ip_set_init_skbinfo(ext_skbinfo(e, set), ext); - return 0; -} +/* Userspace interfaces: we are protected by the nfnl mutex */ -static int -list_set_del(struct ip_set *set, u32 i) +static void +__list_set_del(struct ip_set *set, struct set_elem *e) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(map->net, e->id); + /* We may call it, because we don't have a to be destroyed + * extension which is used by the kernel. + */ ip_set_ext_destroy(set, e); + kfree_rcu(e, rcu); +} - if (i < map->size - 1) - memmove(e, list_set_elem(set, map, i + 1), - set->dsize * (map->size - (i + 1))); +static inline void +list_set_del(struct ip_set *set, struct set_elem *e) +{ + list_del_rcu(&e->list); + __list_set_del(set, e); +} - /* Last element */ - e = list_set_elem(set, map, map->size - 1); - e->id = IPSET_INVALID_ID; - return 0; +static inline void +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) +{ + list_replace_rcu(&old->list, &e->list); + __list_set_del(set, old); } static void set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e; - u32 i = 0; + struct set_elem *e, *n; - while (i < map->size) { - e = list_set_elem(set, map, i); - if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, set))) - list_set_del(set, i); - /* Check element moved to position i in next loop */ - else - i++; - } + list_for_each_entry_safe(e, n, &map->members, list) + if (ip_set_timeout_expired(ext_timeout(e, set))) + list_set_del(set, e); } static int @@ -250,31 +194,45 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; - u32 i; + struct set_elem *e, *next, *prev = NULL; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - else if (e->id != d->id) + else if (e->id != d->id) { + prev = e; continue; + } if (d->before == 0) - return 1; - else if (d->before > 0) - ret = id_eq(set, i + 1, d->refid); - else - ret = i > 0 && id_eq(set, i - 1, d->refid); + ret = 1; + else if (d->before > 0) { + next = list_next_entry(e, list); + ret = !list_is_last(&e->list, &map->members) && + next->id == d->refid; + } else + ret = prev && prev->id == d->refid; return ret; } return 0; } +static void +list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, + struct set_elem *e) +{ + if (SET_WITH_COUNTER(set)) + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(e, set), ext); + /* Update timeout last */ + if (SET_WITH_TIMEOUT(set)) + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); +} static int list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -282,60 +240,78 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; + struct set_elem *e, *n, *prev, *next; bool flag_exist = flags & IPSET_FLAG_EXIST; - u32 i, ret = 0; if (SET_WITH_TIMEOUT(set)) set_cleanup_entries(set); - /* Check already added element */ - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - goto insert; - else if (e->id != d->id) + /* Find where to add the new entry */ + n = prev = next = NULL; + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - - if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) || - (d->before < 0 && - (i == 0 || !id_eq(set, i - 1, d->refid)))) - /* Before/after doesn't match */ + else if (d->id == e->id) + n = e; + else if (d->before == 0 || e->id != d->refid) + continue; + else if (d->before > 0) + next = e; + else + prev = e; + } + /* Re-add already existing element */ + if (n) { + if ((d->before > 0 && !next) || + (d->before < 0 && !prev)) return -IPSET_ERR_REF_EXIST; if (!flag_exist) - /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ - ip_set_ext_destroy(set, e); + ip_set_ext_destroy(set, n); + list_set_init_extensions(set, ext, n); - if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, set), ext->timeout); - if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); - if (SET_WITH_SKBINFO(set)) - ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(map->net, d->id); return 0; } -insert: - ret = -IPSET_ERR_LIST_FULL; - for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - ret = d->before != 0 ? -IPSET_ERR_REF_EXIST - : list_set_add(set, i, d, ext); - else if (e->id != d->refid) - continue; - else if (d->before > 0) - ret = list_set_add(set, i, d, ext); - else if (i + 1 < map->size) - ret = list_set_add(set, i + 1, d, ext); + /* Add new entry */ + if (d->before == 0) { + /* Append */ + n = list_empty(&map->members) ? NULL : + list_last_entry(&map->members, struct set_elem, list); + } else if (d->before > 0) { + /* Insert after next element */ + if (!list_is_last(&next->list, &map->members)) + n = list_next_entry(next, list); + } else { + /* Insert before prev element */ + if (prev->list.prev != &map->members) + n = list_prev_entry(prev, list); } + /* Can we replace a timed out entry? */ + if (n && + !(SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(n, set)))) + n = NULL; + + e = kzalloc(set->dsize, GFP_KERNEL); + if (!e) + return -ENOMEM; + e->id = d->id; + INIT_LIST_HEAD(&e->list); + list_set_init_extensions(set, ext, e); + if (n) + list_set_replace(set, e, n); + else if (next) + list_add_tail_rcu(&e->list, &next->list); + else if (prev) + list_add_rcu(&e->list, &prev->list); + else + list_add_tail_rcu(&e->list, &map->members); - return ret; + return 0; } static int @@ -344,32 +320,30 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; - u32 i; - - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return d->before != 0 ? -IPSET_ERR_REF_EXIST - : -IPSET_ERR_EXIST; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + struct set_elem *e, *next, *prev = NULL; + + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - else if (e->id != d->id) + else if (e->id != d->id) { + prev = e; continue; + } - if (d->before == 0) - return list_set_del(set, i); - else if (d->before > 0) { - if (!id_eq(set, i + 1, d->refid)) + if (d->before > 0) { + next = list_next_entry(e, list); + if (list_is_last(&e->list, &map->members) || + next->id != d->refid) return -IPSET_ERR_REF_EXIST; - return list_set_del(set, i); - } else if (i == 0 || !id_eq(set, i - 1, d->refid)) - return -IPSET_ERR_REF_EXIST; - else - return list_set_del(set, i); + } else if (d->before < 0) { + if (!prev || prev->id != d->refid) + return -IPSET_ERR_REF_EXIST; + } + list_set_del(set, e); + return 0; } - return -IPSET_ERR_EXIST; + return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; } static int @@ -404,6 +378,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + e.before = f & IPSET_FLAG_BEFORE; } @@ -441,27 +416,26 @@ static void list_set_flush(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e; - u32 i; - - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - e->id = IPSET_INVALID_ID; - } - } + struct set_elem *e, *n; + + list_for_each_entry_safe(e, n, &map->members, list) + list_set_del(set, e); } static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; + struct set_elem *e, *n; if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); - list_set_flush(set); + list_for_each_entry_safe(e, n, &map->members, list) { + list_del(&e->list); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); + kfree(e); + } kfree(map); set->data = NULL; @@ -472,6 +446,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) { const struct list_set *map = set->data; struct nlattr *nested; + struct set_elem *e; + u32 n = 0; + + list_for_each_entry(e, &map->members, list) + n++; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -479,7 +458,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * set->dsize))) + htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -496,18 +475,22 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[IPSET_CB_ARG0]; - const struct set_elem *e; + u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + struct set_elem *e; + int ret = 0; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[IPSET_CB_ARG0] < map->size; - cb->args[IPSET_CB_ARG0]++) { - i = cb->args[IPSET_CB_ARG0]; - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - goto finish; + list_for_each_entry(e, &map->members, list) { + if (i == first) + break; + i++; + } + + rcu_read_lock(); + list_for_each_entry_from(e, &map->members, list) { + i++; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -515,9 +498,10 @@ list_set_list(const struct ip_set *set, if (!nested) { if (i == first) { nla_nest_cancel(skb, atd); - return -EMSGSIZE; - } else - goto nla_put_failure; + ret = -EMSGSIZE; + goto out; + } + goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(map->net, e->id))) @@ -526,20 +510,23 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; ipset_nest_end(skb, nested); } -finish: + ipset_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; - return 0; + goto out; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { cb->args[IPSET_CB_ARG0] = 0; - return -EMSGSIZE; + ret = -EMSGSIZE; } + cb->args[IPSET_CB_ARG0] = i - 1; ipset_nest_end(skb, atd); - return 0; +out: + rcu_read_unlock(); + return ret; } static bool @@ -574,9 +561,9 @@ list_set_gc(unsigned long ul_set) struct ip_set *set = (struct ip_set *) ul_set; struct list_set *map = set->data; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set_cleanup_entries(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -600,24 +587,16 @@ static bool init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; - struct set_elem *e; - u32 i; - map = kzalloc(sizeof(*map) + - min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize, - GFP_KERNEL); + map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) return false; map->size = size; map->net = net; + INIT_LIST_HEAD(&map->members); set->data = map; - for (i = 0; i < size; i++) { - e = list_set_elem(set, map, i); - e->id = IPSET_INVALID_ID; - } - return true; } @@ -690,6 +669,7 @@ list_set_init(void) static void __exit list_set_fini(void) { + rcu_barrier(); ip_set_type_unregister(&list_set_type); } -- cgit v0.10.2 From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 19:45:33 +0200 Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl Signed-off-by: Jozsef Kadlecsik diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 19b4969..48bb01e 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) cpu_to_be64((u64)skbinfo->skbmark << 32 | skbinfo->skbmarkmask))) || (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, cpu_to_be16(skbinfo->skbqueue))); - } static inline void diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 5ab4e60..63b2e34 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -15,12 +15,12 @@ /* The protocol version */ #define IPSET_PROTOCOL 6 -/* The maximum permissible comment length we will accept over netlink */ -#define IPSET_MAX_COMMENT_SIZE 255 - /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* Message types and commands */ enum ipset_cmd { IPSET_CMD_NONE, diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 86429f3..d05e759 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct mtype *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -223,7 +223,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_is_filled((const struct mtype_elem *) x) && + mtype_is_filled((const struct mtype_elem *)x) && #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; @@ -240,7 +240,7 @@ mtype_list(const struct ip_set *set, if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, x, - mtype_is_filled((const struct mtype_elem *) x))) + mtype_is_filled((const struct mtype_elem *)x))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -266,13 +266,14 @@ out: static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct mtype *map = set->data; void *x; u32 id; /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ + * but adding/deleting new entries is locked out + */ spin_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index b8ce474..64a5643 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem { static inline u32 ip_to_id(const struct bitmap_ip *m, u32 ip) { - return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; + return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts; } /* Common functions */ @@ -175,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else + } else { ip_to = ip; + } if (ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -187,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -278,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fe00e87..1430535 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, return 0; elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) - return e->ether == NULL || + return !e->ether || ether_addr_equal(e->ether, elem->ether); /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; @@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, /* If MAC is unset yet, we store plain timeout value * because the timer is not activated yet * and we can reuse it later when MAC is filled out, - * possibly by the kernel */ + * possibly by the kernel + */ if (e->ether) ip_set_timeout_set(timeout, t); else @@ -155,7 +156,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* memcpy isn't atomic */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); } return IPSET_ADD_FAILED; } else if (!e->ether) @@ -164,19 +165,18 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* Fill the MAC address and trigger the timer activation */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; } else if (e->ether) { /* We can store MAC too */ - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return 0; - } else { - elem->filled = MAC_UNSET; - /* MAC is not stored yet, don't start timer */ - return IPSET_ADD_STORE_PLAIN_TIMEOUT; } + elem->filled = MAC_UNSET; + /* MAC is not stored yet, don't start timer */ + return IPSET_ADD_STORE_PLAIN_TIMEOUT; } static inline int @@ -352,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } elements = (u64)last_ip - first_ip + 1; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 2d360f9..5338ccd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -162,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (port < map->first_port) return -IPSET_ERR_BITMAP_RANGE; } - } else + } else { port_to = port; + } if (port_to > map->last_port) return -IPSET_ERR_BITMAP_RANGE; @@ -174,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2b21a19..338b404 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -35,6 +35,7 @@ struct ip_set_net { bool is_deleted; /* deleted by ip_set_net_exit */ bool is_destroyed; /* all sets are destroyed */ }; + static int ip_set_net_id __read_mostly; static inline struct ip_set_net *ip_set_pernet(struct net *net) @@ -60,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] -/* - * The set types are implemented in modules and registered set types +/* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is * serialized by ip_set_type_mutex. */ @@ -131,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision, goto unlock; } /* Make sure the type is already loaded - * but we don't support the revision */ + * but we don't support the revision + */ list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STRNCMP(type->name, name)) { err = -IPSET_ERR_FIND_TYPE; @@ -290,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -307,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -318,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) return -IPSET_ERR_PROTOCOL; memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), - sizeof(struct in6_addr)); + sizeof(struct in6_addr)); return 0; } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); @@ -467,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, } EXPORT_SYMBOL_GPL(ip_set_put_extensions); -/* - * Creating/destroying/renaming/swapping affect the existence and +/* Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace * only and serialized by the nfnl mutex indirectly from nfnetlink. * @@ -495,8 +495,7 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } -/* - * Add, del and test set entries from kernel. +/* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced * so it can't be destroyed (or changed) under our foot. @@ -524,7 +523,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -563,7 +562,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -586,7 +585,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -601,8 +600,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(ip_set_del); -/* - * Find set by name, reference it once. The reference makes sure the +/* Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * */ @@ -616,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s != NULL && STRNCMP(s->name, name)) { + if (s && STRNCMP(s->name, name)) { __ip_set_get(s); index = i; *set = s; @@ -629,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) } EXPORT_SYMBOL_GPL(ip_set_get_byname); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -643,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) rcu_read_lock(); set = rcu_dereference(inst->ip_set_list)[index]; - if (set != NULL) + if (set) __ip_set_put(set); rcu_read_unlock(); } @@ -657,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_put_byindex); -/* - * Get the name of a set behind a set index. +/* Get the name of a set behind a set index. * We assume the set is referenced, so it does exist and * can't be destroyed. The set cannot be renamed due to * the referencing either. @@ -669,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) { const struct ip_set *set = ip_set_rcu_get(net, index); - BUG_ON(set == NULL); + BUG_ON(!set); BUG_ON(set->ref == 0); /* Referenced, so it's safe */ @@ -677,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_name_byindex); -/* - * Routines to call by external subsystems, which do not +/* Routines to call by external subsystems, which do not * call nfnl_lock for us. */ -/* - * Find set by index, reference it once. The reference makes sure the +/* Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * * The nfnl mutex is used in the function. @@ -709,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -725,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) nfnl_lock(NFNL_SUBSYS_IPSET); if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ set = ip_set(inst, index); - if (set != NULL) + if (set) __ip_set_put(set); } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); -/* - * Communication protocol with userspace over netlink. +/* Communication protocol with userspace over netlink. * * The commands are serialized by the nfnl mutex. */ @@ -760,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); - if (nlh == NULL) + if (!nlh) return NULL; nfmsg = nlmsg_data(nlh); @@ -793,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); - if (set != NULL && STRNCMP(set->name, name)) { + if (set && STRNCMP(set->name, name)) { *id = i; break; } @@ -819,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, *index = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s == NULL) { + if (!s) { if (*index == IPSET_INVALID_ID) *index = i; } else if (STRNCMP(name, s->name)) { @@ -851,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; - struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; const char *name, *typename; u8 family, revision; u32 flags = flag_exist(nlh); int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_REVISION] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL || - (attr[IPSET_ATTR_DATA] != NULL && + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_REVISION] || + !attr[IPSET_ATTR_FAMILY] || + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])))) return -IPSET_ERR_PROTOCOL; @@ -873,11 +865,10 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", name, typename, family_name(family), revision); - /* - * First, and without any locks, allocate and initialize + /* First, and without any locks, allocate and initialize * a normal base set structure. */ - set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); + set = kzalloc(sizeof(*set), GFP_KERNEL); if (!set) return -ENOMEM; spin_lock_init(&set->lock); @@ -885,21 +876,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set->family = family; set->revision = revision; - /* - * Next, check that we know the type, and take + /* Next, check that we know the type, and take * a reference on the type, to make sure it stays available * while constructing our new set. * * After referencing the type, we try to create the type * specific part of the set without holding any locks. */ - ret = find_set_type_get(typename, family, revision, &(set->type)); + ret = find_set_type_get(typename, family, revision, &set->type); if (ret) goto out; - /* - * Without holding any locks, create private part. - */ + /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy)) { @@ -913,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* BTW, ret==0 here. */ - /* - * Here, we have a valid, constructed set and we are protected + /* Here, we have a valid, constructed set and we are protected * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ @@ -937,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* Wraparound */ goto cleanup; - list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL); + list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ @@ -951,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, inst->ip_set_max = i; kfree(tmp); ret = 0; - } else if (ret) + } else if (ret) { goto cleanup; + } - /* - * Finally! Add our shiny new set to the list, and be done. - */ + /* Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); ip_set(inst, index) = set; @@ -1018,7 +1004,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && s->ref) { + if (s && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1037,7 +1023,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } else { s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); - if (s == NULL) { + if (!s) { ret = -ENOENT; goto out; } else if (s->ref) { @@ -1082,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL) + if (s) ip_set_flush_set(s); } } else { s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (s == NULL) + if (!s) return -ENOENT; ip_set_flush_set(s); @@ -1119,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; read_lock_bh(&ip_set_ref_lock); @@ -1136,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && STRNCMP(s->name, name2)) { + if (s && STRNCMP(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } @@ -1168,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, char from_name[IPSET_MAXNAMELEN]; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); - if (from == NULL) + if (!from) return -ENOENT; to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); - if (to == NULL) + if (!to) return -IPSET_ERR_EXIST_SETNAME2; /* Features must not change. - * Not an artificial restriction anymore, as we must prevent - * possible loops created by swapping in setlist type of sets. */ + * Not an artifical restriction anymore, as we must prevent + * possible loops created by swapping in setlist type of sets. + */ if (!(from->type->features == to->type->features && from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; @@ -1246,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; @@ -1260,16 +1247,18 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (set == NULL) + if (!set) return -ENOENT; dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; - } else + } else { dump_type = DUMP_ALL; + } if (cda[IPSET_ATTR_FLAGS]) { u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); + dump_type |= (f << 16); } cb->args[IPSET_CB_NET] = (unsigned long)inst; @@ -1295,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message - * manually :-( */ + * manually :-( + */ if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(cb->skb, nlh, ret); return ret; @@ -1313,7 +1303,7 @@ dump_last: pr_debug("dump type, flag: %u %u index: %ld\n", dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { - index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; + index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; write_lock_bh(&ip_set_ref_lock); set = ip_set(inst, index); is_destroyed = inst->is_destroyed; @@ -1480,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, size_t payload = min(SIZE_MAX, sizeof(*errmsg) + nlmsg_len(nlh)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *cmdattr; u32 *errline; skb2 = nlmsg_new(payload, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); @@ -1502,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1517,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1572,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1627,17 +1618,17 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_DATA] == NULL || + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], @@ -1668,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL)) + !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1725,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL)) + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); @@ -1736,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, return ret; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1781,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) return -IPSET_ERR_PROTOCOL; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1913,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned int *) data; + op = (unsigned int *)data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ @@ -2025,7 +2016,7 @@ ip_set_net_init(struct net *net) if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); + list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; @@ -2061,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = { .size = sizeof(struct ip_set_net) }; - static int __init ip_set_init(void) { int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); return ret; diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 1981f02..42c3e3b 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct tcphdr *th; th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); - if (th == NULL) + if (!th) /* No choice either */ return false; @@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const sctp_sctphdr_t *sh; sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); - if (sh == NULL) + if (!sh) /* No choice either */ return false; @@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct udphdr *uh; uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); - if (uh == NULL) + if (!uh) /* No choice either */ return false; @@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmphdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16)htons((ic->type << 8) | ic->code); @@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmp6hdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16) @@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, return false; default: /* Other protocols doesn't have ports, - so we can match fragments */ + * so we can match fragments. + */ *proto = protocol; return true; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f352cc0..afe905c 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -35,7 +35,7 @@ /* Number of elements to store in an initial array block */ #define AHASH_INIT_SIZE 4 /* Max number of elements to store in an array block */ -#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) +#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 @@ -57,6 +57,7 @@ tune_ahash_max(u8 curr, u32 multi) */ return n > curr && n <= AHASH_MAX_TUNED ? n : curr; } + #define TUNE_AHASH_MAX(h, multi) \ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) #else @@ -256,7 +257,7 @@ htable_bits(u32 hashsize) #endif #define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ +(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \ & jhash_mask(htable_bits)) #ifndef htype @@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { - if (j != -1) + if (j != -1) { continue; - else if (h->nets[i].cidr[n] < cidr) + } else if (h->nets[i].cidr[n] < cidr) { j = i; - else if (h->nets[i].cidr[n] == cidr) { + } else if (h->nets[i].cidr[n] == cidr) { h->nets[cidr - 1].nets[n]++; return; } @@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr[n] != cidr) - continue; + if (h->nets[i].cidr[n] != cidr) + continue; h->nets[cidr - 1].nets[n]--; if (h->nets[cidr - 1].nets[n] > 0) - return; + return; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) - h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + return; } } #endif @@ -426,8 +427,8 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1), - true); + mtype_ahash_destroy(set, + __ipset_dereference_protected(h->table, 1), true); kfree(h); set->data = NULL; @@ -439,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct htype *h = set->data; init_timer(&h->gc); - h->gc.data = (unsigned long) set; + h->gc.data = (unsigned long)set; h->gc.function = gc; h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); @@ -530,7 +531,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct htype *h = set->data; pr_debug("called\n"); @@ -544,7 +545,8 @@ mtype_gc(unsigned long ul_set) /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or - * fail due to memory pressures. */ + * fail due to memory pressures. + */ static int mtype_resize(struct ip_set *set, bool retried) { @@ -687,7 +689,8 @@ cleanup: } /* Add an element to a hash and update the internal counters when succeeded, - * otherwise report the proper error code. */ + * otherwise report the proper error code. + */ static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) @@ -926,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS /* Special test function which takes into account the different network - * sizes added to the set */ + * sizes added to the set + */ static int mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, const struct ip_set_ext *ext, @@ -1004,7 +1008,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, - * try all possible network sizes */ + * try all possible network sizes + */ for (i = 0; i < IPSET_NET_COUNT; i++) if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; @@ -1148,8 +1153,8 @@ mtype_list(const struct ip_set *set, nla_nest_cancel(skb, atd); ret = -EMSGSIZE; goto out; - } else - goto nla_put_failure; + } + goto nla_put_failure; } if (mtype_data_list(skb, e)) goto nla_put_failure; @@ -1171,8 +1176,9 @@ nla_put_failure: set->name); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; - } else + } else { ipset_nest_end(skb, atd); + } out: rcu_read_unlock(); return ret; @@ -1180,12 +1186,13 @@ out: static int IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); + enum ipset_adt adt, u32 *lineno, u32 flags, + bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index f54d706..9d6bf19 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -158,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index f8fbc32..a0695a2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -155,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -206,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next, #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" - static int hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -268,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; - return ret; + return 0; } static struct ip_set_type hash_ipmark_type __read_mostly = { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9a31db8..9d84b3d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -140,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -187,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -305,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -329,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fc42489..215b7b9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, static bool hash_ipportip4_data_list(struct sk_buff *skb, - const struct hash_ipportip4_elem *data) + const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || @@ -147,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -194,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -320,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -344,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2a69b9b..9ca7196 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -209,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -263,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (ip2_from + UINT_MAX == ip2_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -287,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -466,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -497,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 112aff3..f1e7d2c 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, return 0; if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); @@ -116,7 +116,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER])); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -IPSET_ERR_HASH_ELEM; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index e49b1d0..3e4bffd 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -169,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -176,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); - return ip_set_enomatch(ret, flags, adt, set) ? -ret: + return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } @@ -198,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -339,6 +340,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 42c893e..43d8c98 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -143,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb) return dev ? dev->name : NULL; } -static const char *get_phyoutdev_name(const struct sk_buff *skb) +static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); @@ -178,15 +178,16 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -229,6 +230,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) @@ -249,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr); + } if (retried) ip = ntohl(h->next.ip); @@ -261,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -385,15 +388,16 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); + if (!eiface) return -EINVAL; - STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -403,7 +407,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; @@ -440,6 +444,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index b5428be..3c862c0 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -57,8 +57,8 @@ struct hash_netnet4_elem { static inline bool hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, - const struct hash_netnet4_elem *ip2, - u32 *multi) + const struct hash_netnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp; @@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) static inline void hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, - struct hash_netnet4_elem *orig) + struct hash_netnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) static bool hash_netnet4_data_list(struct sk_buff *skb, - const struct hash_netnet4_elem *data) + const struct hash_netnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -122,7 +122,7 @@ nla_put_failure: static inline void hash_netnet4_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet4_elem *d) + const struct hash_netnet4_elem *d) { next->ipcmp = d->ipcmp; } @@ -133,8 +133,8 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next, static int hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -156,7 +156,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -199,6 +199,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -221,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { @@ -233,8 +235,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -251,8 +254,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = last2 + 1; } ip = last + 1; @@ -276,8 +279,8 @@ struct hash_netnet6_elem { static inline bool hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, - const struct hash_netnet6_elem *ip2, - u32 *multi) + const struct hash_netnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -304,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) static inline void hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, - struct hash_netnet6_elem *orig) + struct hash_netnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -323,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) static bool hash_netnet6_data_list(struct sk_buff *skb, - const struct hash_netnet6_elem *data) + const struct hash_netnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -342,7 +345,7 @@ nla_put_failure: static inline void hash_netnet6_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet6_elem *d) + const struct hash_netnet6_elem *d) { } @@ -356,8 +359,8 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next, static int hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -367,7 +370,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) - e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); @@ -379,7 +382,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; @@ -424,6 +427,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 27307d0..731813e 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -198,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -208,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -233,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -250,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } ip = last + 1; } @@ -413,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -444,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 1e0e47a..0c68734 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -62,8 +62,8 @@ struct hash_netportnet4_elem { static inline bool hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, - const struct hash_netportnet4_elem *ip2, - u32 *multi) + const struct hash_netportnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp && @@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) static inline void hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, - struct hash_netportnet4_elem *orig) + struct hash_netportnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, static bool hash_netportnet4_data_list(struct sk_buff *skb, - const struct hash_netportnet4_elem *data) + const struct hash_netportnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -132,7 +132,7 @@ nla_put_failure: static inline void hash_netportnet4_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet4_elem *d) + const struct hash_netportnet4_elem *d) { next->ipcmp = d->ipcmp; next->port = d->port; @@ -144,8 +144,8 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -171,7 +171,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -223,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -254,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { @@ -273,8 +276,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -296,8 +300,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -324,8 +328,8 @@ struct hash_netportnet6_elem { static inline bool hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, - const struct hash_netportnet6_elem *ip2, - u32 *multi) + const struct hash_netportnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -354,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) static inline void hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, - struct hash_netportnet6_elem *orig) + struct hash_netportnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -374,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, static bool hash_netportnet6_data_list(struct sk_buff *skb, - const struct hash_netportnet6_elem *data) + const struct hash_netportnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -395,7 +399,7 @@ nla_put_failure: static inline void hash_netportnet6_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet6_elem *d) + const struct hash_netportnet6_elem *d) { next->port = d->port; } @@ -410,8 +414,8 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -437,7 +441,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -493,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -524,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 9f624ee..a1fe537 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -206,14 +206,15 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; } - if (d->before == 0) + if (d->before == 0) { ret = 1; - else if (d->before > 0) { + } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && next->id == d->refid; - } else + } else { ret = prev && prev->id == d->refid; + } return ret; } return 0; @@ -558,7 +559,7 @@ static const struct ip_set_type_variant set_variant = { static void list_set_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct list_set *map = set->data; spin_lock_bh(&set->lock); @@ -575,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct list_set *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 04d15fd..1c8a42c 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -1,9 +1,7 @@ #include #include -/* - * Prefixlen maps for fast conversions, by Jan Engelhardt. - */ +/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ #define E(a, b, c, d) \ {.ip6 = { \ @@ -11,8 +9,7 @@ htonl(c), htonl(d), \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { @@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = { EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E -#define E(a, b, c, d) \ - {.ip6 = { (__force __be32) a, (__force __be32) b, \ - (__force __be32) c, (__force __be32) d, \ +#define E(a, b, c, d) \ + {.ip6 = { (__force __be32)a, (__force __be32)b, \ + (__force __be32)c, (__force __be32)d, \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index b103e96..5669e5b 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -9,7 +9,8 @@ */ /* Kernel module which implements the set match and SET target - * for netfilter/iptables. */ + * for netfilter/iptables. + */ #include #include @@ -53,6 +54,7 @@ static bool set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); @@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info) info->u.compat.dim = IPSET_DIM_ZERO; if (info->u.flags[0] & IPSET_MATCH_INV) info->u.compat.flags |= IPSET_INV_MATCH; - for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) { info->u.compat.dim++; if (info->u.flags[i] & IPSET_SRC) - info->u.compat.flags |= (1<u.compat.dim); + info->u.compat.flags |= (1 << info->u.compat.dim); } } @@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) info->match_set.index); return -ENOENT; } - if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; @@ -115,6 +117,7 @@ static bool set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, 0, UINT_MAX); @@ -179,9 +182,10 @@ static bool set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v3 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -225,9 +229,10 @@ static bool set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v4 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -253,6 +258,7 @@ static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, @@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) return -ENOENT; } } - if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || - info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || + info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -325,6 +331,7 @@ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -393,6 +400,7 @@ static unsigned int set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -419,6 +427,8 @@ static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; + int ret; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) ADT_OPT(map_opt, par->family, info->map_set.dim, info->map_set.flags, 0, UINT_MAX); - int ret; - /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } - static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { @@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only" - "from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, @@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension " - "is over the limit!\n"); + pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(par->net, info->map_set.index); } - static struct xt_match set_matches[] __read_mostly = { { .name = "set", -- cgit v0.10.2 From f09becc79f899f92557ce6d5562a8b80d6addb34 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Jun 2015 13:58:52 +0200 Subject: netfilter: Kconfig: get rid of parens around depends on According to the reporter, they are not needed. Reported-by: Sergei Shtylyov Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fb20f36..2199a5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -195,7 +195,8 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW) + depends on NETFILTER_ADVANCED + depends on IP_NF_MANGLE || IP_NF_RAW ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ca69983..b552cf0 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -186,7 +186,8 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW) + depends on NETFILTER_ADVANCED + depends on IP6_NF_MANGLE || IP6_NF_RAW ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fbc8d15..6eae69a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -206,7 +206,7 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" - depends on (IPV6 || IPV6=n) + depends on IPV6 || IPV6=n depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most @@ -723,7 +723,7 @@ config NETFILTER_XT_TARGET_HL config NETFILTER_XT_TARGET_HMARK tristate '"HMARK" target support' - depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on NETFILTER_ADVANCED ---help--- This option adds the "HMARK" target. @@ -865,7 +865,7 @@ config NETFILTER_XT_TARGET_REDIRECT config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) + depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK ---help--- This option adds a "TEE" target with which a packet can be cloned and @@ -875,8 +875,8 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) - depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on IPV6 || IPV6=n + depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES @@ -915,7 +915,7 @@ config NETFILTER_XT_TARGET_SECMARK config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' - depends on (IPV6 || IPV6=n) + depends on IPV6 || IPV6=n default m if NETFILTER_ADVANCED=n ---help--- This option adds a `TCPMSS' target, which allows you to alter the @@ -1127,7 +1127,7 @@ config NETFILTER_XT_MATCH_ESP config NETFILTER_XT_MATCH_HASHLIMIT tristate '"hashlimit" match support' - depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on NETFILTER_ADVANCED help This option adds a `hashlimit' match. @@ -1369,8 +1369,8 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED depends on !NF_CONNTRACK || NF_CONNTRACK - depends on (IPV6 || IPV6=n) - depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on IPV6 || IPV6=n + depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES help -- cgit v0.10.2 From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 09:57:30 -0700 Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference. After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore : Only one copy of table is kept, instead of one copy per cpu. We also can avoid a dereference if we put table data right after xt_table_info. It reduces register pressure and helps compiler. Then, we attempt a kmalloc() if total size is under order-3 allocation, to reduce TLB pressure, as in many cases, rules fit in 32 KB. Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9969d79..95693c4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -225,12 +225,9 @@ struct xt_table_info { unsigned int __percpu *stackptr; void ***jumpstack; - /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries; + unsigned char entries[0] __aligned(8); }; -#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ - + nr_cpu_ids * sizeof(char *)) int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d75c139..95c9b6e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct arphdr *arp; struct arpt_entry *e, *back; const char *indev, *outdev; - void *table_base; + const void *table_base; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -868,7 +868,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct arpt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6151500..6c72fbb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -938,7 +938,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1052,7 +1052,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 80a7f0d..3c35ced 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -951,7 +951,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1065,7 +1065,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6062ce3..d324fe7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -658,29 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { - struct xt_table_info *newinfo; + struct xt_table_info *info = NULL; + size_t sz = sizeof(*info) + size; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; - newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); - if (!newinfo) - return NULL; - - newinfo->size = size; - - if (size <= PAGE_SIZE) - newinfo->entries = kmalloc(size, GFP_KERNEL); - else - newinfo->entries = vmalloc(size); - - if (newinfo->entries == NULL) { - xt_free_table_info(newinfo); - return NULL; + if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!info) { + info = vmalloc(sz); + if (!info) + return NULL; } - - return newinfo; + memset(info, 0, sizeof(*info)); + info->size = size; + return info; } EXPORT_SYMBOL(xt_alloc_table_info); @@ -688,8 +682,6 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - kvfree(info->entries); - if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) kvfree(info->jumpstack[cpu]); @@ -698,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info) free_percpu(info->stackptr); - kfree(info); + kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); -- cgit v0.10.2 From 2cbce139fc57bc2625f88add055d0b94f00c3352 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Jun 2015 13:55:41 +0200 Subject: netfilter: nf_tables: attach net_device to basechain The device is part of the hook configuration, so instead of a global configuration per table, set it to each of the basechain that we create. This patch reworks ebddf1a8d78a ("netfilter: nf_tables: allow to bind table to net_device"). Note that this adds a dev_name field in the nft_base_chain structure which is required the netdev notification subscription that follows up in a patch to handle gone net_devices. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d6f48c..09d6f8d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -791,6 +791,7 @@ struct nft_stats { * @policy: default policy * @stats: per-cpu chain stats * @chain: the chain + * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; @@ -799,6 +800,7 @@ struct nft_base_chain { u8 policy; struct nft_stats __percpu *stats; struct nft_chain chain; + char dev_name[IFNAMSIZ]; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) @@ -819,7 +821,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @name: name of the table - * @dev: this table is bound to this device (if any) */ struct nft_table { struct list_head list; @@ -829,7 +830,6 @@ struct nft_table { u32 use; u16 flags; char name[NFT_TABLE_MAXNAMELEN]; - struct net_device *dev; }; enum nft_af_flags { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 89a671e..a99e6a9 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -122,11 +122,13 @@ enum nft_list_attributes { * * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_HOOK_DEV: netdevice name (NLA_STRING) */ enum nft_hook_attributes { NFTA_HOOK_UNSPEC, NFTA_HOOK_HOOKNUM, NFTA_HOOK_PRIORITY, + NFTA_HOOK_DEV, __NFTA_HOOK_MAX }; #define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) @@ -146,14 +148,12 @@ enum nft_table_flags { * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) - * @NFTA_TABLE_DEV: net device name (NLA_STRING) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, NFTA_TABLE_USE, - NFTA_TABLE_DEV, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4528f12..900c81a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -399,8 +399,6 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, - [NFTA_TABLE_DEV] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -425,10 +423,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; - if (table->dev && - nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name)) - goto nla_put_failure; - nlmsg_end(skb, nlh); return 0; @@ -614,11 +608,6 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; - if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) && - ctx->nla[NFTA_TABLE_DEV] && - nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name)) - return -EOPNOTSUPP; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -656,7 +645,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - struct net_device *dev = NULL; u32 flags = 0; struct nft_ctx ctx; int err; @@ -691,20 +679,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } - if (afi->flags & NFT_AF_NEEDS_DEV) { - char ifname[IFNAMSIZ]; - - if (!nla[NFTA_TABLE_DEV]) - return -EOPNOTSUPP; - - nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ); - dev = dev_get_by_name(net, ifname); - if (!dev) - return -ENOENT; - } else if (nla[NFTA_TABLE_DEV]) { - return -EOPNOTSUPP; - } - err = -EAFNOSUPPORT; if (!try_module_get(afi->owner)) goto err1; @@ -718,7 +692,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; - table->dev = dev; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); @@ -732,9 +705,6 @@ err3: err2: module_put(afi->owner); err1: - if (dev != NULL) - dev_put(dev); - return err; } @@ -838,9 +808,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); - if (ctx->table->dev) - dev_put(ctx->table->dev); - kfree(ctx->table); module_put(ctx->afi->owner); } @@ -916,6 +883,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, + [NFTA_HOOK_DEV] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, }; static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) @@ -989,6 +958,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; + if (basechain->dev_name[0] && + nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name)) + goto nla_put_failure; nla_nest_end(skb, nest); if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -1200,9 +1172,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { - module_put(nft_base_chain(chain)->type->owner); - free_percpu(nft_base_chain(chain)->stats); - kfree(nft_base_chain(chain)); + struct nft_base_chain *basechain = nft_base_chain(chain); + + module_put(basechain->type->owner); + free_percpu(basechain->stats); + if (basechain->ops[0].dev != NULL) + dev_put(basechain->ops[0].dev); + kfree(basechain); } else { kfree(chain); } @@ -1221,6 +1197,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nlattr *ha[NFTA_HOOK_MAX + 1]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct net_device *dev = NULL; u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; @@ -1360,17 +1337,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return -ENOENT; hookfn = type->hooks[hooknum]; + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) { + module_put(type->owner); + return -ENOENT; + } + } else if (ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) { module_put(type->owner); + if (dev != NULL) + dev_put(dev); return -ENOMEM; } + if (dev != NULL) + strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); + if (dev != NULL) + dev_put(dev); return PTR_ERR(stats); } basechain->stats = stats; @@ -1379,6 +1382,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (stats == NULL) { module_put(type->owner); kfree(basechain); + if (dev != NULL) + dev_put(dev); return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); @@ -1396,7 +1401,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->priority = priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; - ops->dev = table->dev; + ops->dev = dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) -- cgit v0.10.2 From d8ee8f7c56b267751f95a1f0b4a75618909c07ac Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jun 2015 02:42:31 +0200 Subject: netfilter: nf_tables: add nft_register_basechain() and nft_unregister_basechain() This wrapper functions take care of hook registration for basechains. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 900c81a..c66dc62 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -127,13 +127,38 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) +{ + return nf_register_hooks(basechain->ops, hook_nops); +} + +static void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) +{ + nf_unregister_hooks(basechain->ops, hook_nops); +} + +static int nf_tables_register_hooks(const struct nft_table *table, + struct nft_chain *chain, + unsigned int hook_nops) +{ + if (table->flags & NFT_TABLE_F_DORMANT || + !(chain->flags & NFT_BASE_CHAIN)) + return 0; + + return nft_register_basechain(nft_base_chain(chain), hook_nops); +} + static void nf_tables_unregister_hooks(const struct nft_table *table, - const struct nft_chain *chain, + struct nft_chain *chain, unsigned int hook_nops) { - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) - nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops); + if (table->flags & NFT_TABLE_F_DORMANT || + !(chain->flags & NFT_BASE_CHAIN)) + return; + + nft_unregister_basechain(nft_base_chain(chain), hook_nops); } /* Internal table flags */ @@ -560,7 +585,7 @@ static int nf_tables_table_enable(const struct nft_af_info *afi, if (!(chain->flags & NFT_BASE_CHAIN)) continue; - err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); + err = nft_register_basechain(nft_base_chain(chain), afi->nops); if (err < 0) goto err; @@ -575,20 +600,20 @@ err: if (i-- <= 0) break; - nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); + nft_unregister_basechain(nft_base_chain(chain), afi->nops); } return err; } static void nf_tables_table_disable(const struct nft_af_info *afi, - struct nft_table *table) + struct nft_table *table) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { if (chain->flags & NFT_BASE_CHAIN) - nf_unregister_hooks(nft_base_chain(chain)->ops, - afi->nops); + nft_unregister_basechain(nft_base_chain(chain), + afi->nops); } } @@ -1421,12 +1446,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); - if (err < 0) - goto err1; - } + err = nf_tables_register_hooks(table, chain, afi->nops); + if (err < 0) + goto err1; nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); -- cgit v0.10.2 From 835b803377f5f11f9ccf234f70ed667a82605c45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jun 2015 12:12:01 +0200 Subject: netfilter: nf_tables_netdev: unregister hooks on net_device removal In case the net_device is gone, we have to unregister the hooks and put back the reference on the net_device object. Once it comes back, register them again. This also covers the device rename case. This patch also adds a new flag to indicate that the basechain is disabled, so their hooks are not registered. This flag is used by the netdev family to handle the case where the net_device object is gone. Currently this flag is not exposed to userspace. Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 09d6f8d..2a24668 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -781,6 +781,7 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 +#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain @@ -798,6 +799,7 @@ struct nft_base_chain { possible_net_t pnet; const struct nf_chain_type *type; u8 policy; + u8 flags; struct nft_stats __percpu *stats; struct nft_chain chain; char dev_name[IFNAMSIZ]; @@ -808,6 +810,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); +void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); + unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c66dc62..cfe6368 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -127,17 +127,25 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { + if (basechain->flags & NFT_BASECHAIN_DISABLED) + return 0; + return nf_register_hooks(basechain->ops, hook_nops); } +EXPORT_SYMBOL_GPL(nft_register_basechain); -static void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { + if (basechain->flags & NFT_BASECHAIN_DISABLED) + return; + nf_unregister_hooks(basechain->ops, hook_nops); } +EXPORT_SYMBOL_GPL(nft_unregister_basechain); static int nf_tables_register_hooks(const struct nft_table *table, struct nft_chain *chain, diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 04cb170..2cae4d4 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -157,6 +158,77 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .hook_mask = (1 << NF_NETDEV_INGRESS), }; +static void nft_netdev_event(unsigned long event, struct nft_af_info *afi, + struct net_device *dev, struct nft_table *table, + struct nft_base_chain *basechain) +{ + switch (event) { + case NETDEV_REGISTER: + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + + BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED)); + + dev_hold(dev); + basechain->ops[0].dev = dev; + basechain->flags &= ~NFT_BASECHAIN_DISABLED; + if (!(table->flags & NFT_TABLE_F_DORMANT)) + nft_register_basechain(basechain, afi->nops); + break; + case NETDEV_UNREGISTER: + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + + BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED); + + if (!(table->flags & NFT_TABLE_F_DORMANT)) + nft_unregister_basechain(basechain, afi->nops); + + dev_put(basechain->ops[0].dev); + basechain->ops[0].dev = NULL; + basechain->flags |= NFT_BASECHAIN_DISABLED; + break; + case NETDEV_CHANGENAME: + if (dev->ifindex != basechain->ops[0].dev->ifindex) + return; + + strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + break; + } +} + +static int nf_tables_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + if (afi->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + nft_netdev_event(event, afi, dev, table, + nft_base_chain(chain)); + } + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_netdev_notifier = { + .notifier_call = nf_tables_netdev_event, +}; + static int __init nf_tables_netdev_init(void) { int ret; @@ -166,11 +238,14 @@ static int __init nf_tables_netdev_init(void) if (ret < 0) nft_unregister_chain_type(&nft_filter_chain_netdev); + register_netdevice_notifier(&nf_tables_netdev_notifier); + return ret; } static void __exit nf_tables_netdev_exit(void) { + unregister_netdevice_notifier(&nf_tables_netdev_notifier); unregister_pernet_subsys(&nf_tables_netdev_net_ops); nft_unregister_chain_type(&nft_filter_chain_netdev); } -- cgit v0.10.2