From 2d9e9b0d05876c9e6b52cfec142e51da9b9b3f6d Mon Sep 17 00:00:00 2001 From: Yannick Brosseau Date: Mon, 25 Jan 2016 17:12:25 -0800 Subject: netfilter: ipvs: Remove noisy debug print from ip_vs_del_service This have been there for a long time, but does not seem to add value Signed-off-by: Yannick Brosseau Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e7c1b05..daf4cb7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_pe *old_pe; struct netns_ipvs *ipvs = svc->ipvs; - pr_info("%s: enter\n", __func__); - /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) ipvs->num_services--; -- cgit v0.10.2 From f6ca9f46f6615c3a87529550058d1b468c0cad89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Jan 2016 14:52:01 +0100 Subject: netfilter: ipvs: avoid unused variable warnings The proc_create() and remove_proc_entry() functions do not reference their arguments when CONFIG_PROC_FS is disabled, so we get a couple of warnings about unused variables in IPVS: ipvs/ip_vs_app.c:608:14: warning: unused variable 'net' [-Wunused-variable] ipvs/ip_vs_ctl.c:3950:14: warning: unused variable 'net' [-Wunused-variable] ipvs/ip_vs_ctl.c:3994:14: warning: unused variable 'net' [-Wunused-variable] This removes the local variables and instead looks them up separately for each use, which obviously avoids the warning. Signed-off-by: Arnd Bergmann Fixes: 4c50a8ce2b63 ("netfilter: ipvs: avoid unused variable warning") Acked-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0328f72..299edc6 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - INIT_LIST_HEAD(&ipvs->app_list); - proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); + proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops); return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - unregister_ip_vs_app(ipvs, NULL /* all */); - remove_proc_entry("ip_vs_app", net->proc_net); + remove_proc_entry("ip_vs_app", ipvs->net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index daf4cb7..404b2a4 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3945,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; int i, idx; /* Initialize rs_table */ @@ -3972,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); - proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); - proc_create("ip_vs_stats_percpu", 0, net->proc_net, + proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops); + proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops); + proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net, &ip_vs_stats_percpu_fops); if (ip_vs_control_net_init_sysctl(ipvs)) @@ -3989,13 +3988,11 @@ err: void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); - remove_proc_entry("ip_vs_stats_percpu", net->proc_net); - remove_proc_entry("ip_vs_stats", net->proc_net); - remove_proc_entry("ip_vs", net->proc_net); + remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); + remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); + remove_proc_entry("ip_vs", ipvs->net->proc_net); free_percpu(ipvs->tot_stats.cpustats); } -- cgit v0.10.2 From b07edbe1cf3dae9ba81f24888e2f2a9dbe778918 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 16 Feb 2016 17:24:08 +0100 Subject: netfilter: meta: add PRANDOM support Can be used to randomly match packets e.g. for statistic traffic sampling. See commit 3ad0040573b0c00f8848 ("bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs") for more info why this doesn't use prandom_u32 directly. Unlike bpf nft_meta can be built as a module, so add an EXPORT_SYMBOL for prandom_seed_full_state too. Cc: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be41ffc..b19be0a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -681,6 +681,7 @@ enum nft_exthdr_attributes { * @NFT_META_IIFGROUP: packet input interface group * @NFT_META_OIFGROUP: packet output interface group * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) + * @NFT_META_PRANDOM: a 32bit pseudo-random number */ enum nft_meta_keys { NFT_META_LEN, @@ -707,6 +708,7 @@ enum nft_meta_keys { NFT_META_IIFGROUP, NFT_META_OIFGROUP, NFT_META_CGROUP, + NFT_META_PRANDOM, }; /** diff --git a/lib/random32.c b/lib/random32.c index 1211191..510d1ce 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -255,6 +255,7 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) prandom_warmup(state); } } +EXPORT_SYMBOL(prandom_seed_full_state); /* * Generate better values after random number generator diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index fe885bf..16c50b0 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -28,6 +28,8 @@ #include /* NF_BR_PRE_ROUTING */ +static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -181,6 +183,11 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif + case NFT_META_PRANDOM: { + struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); + *dest = prandom_u32_state(state); + break; + } default: WARN_ON(1); goto err; @@ -277,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_OIFNAME: len = IFNAMSIZ; break; + case NFT_META_PRANDOM: + prandom_init_once(&nft_prandom_state); + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } -- cgit v0.10.2 From 8ee225e78531981e479e79b74354b63e188b4ee2 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 23 Feb 2016 13:40:10 +0530 Subject: netfilter: xt_osf: remove unused variable While building with W=1 we got the warning: net/netfilter/xt_osf.c:265:9: warning: variable 'loop_cont' set but not used The local variable loop_cont was only initialized and then assigned a value but was never used or checked after that. While removing the variable, the case of OSFOPT_TS was not removed so that it will serve as a reminder to us that we can do something in that particular case. Signed-off-by: Sudip Mukherjee Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 4e3c3af..2455b69 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -262,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) if (f->opt[optnum].kind == (*optp)) { __u32 len = f->opt[optnum].length; const __u8 *optend = optp + len; - int loop_cont = 0; fmatch = FMATCH_OK; @@ -275,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: - loop_cont = 1; break; } -- cgit v0.10.2 From 5f547391f5f25bb71f27860ad25bd1e4715f0752 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 3 Feb 2016 10:00:10 -0800 Subject: netfilter: nf_defrag_ipv4: Drop redundant ip_send_check() Since commit 0848f6428ba3 ("inet: frags: fix defragmented packet's IP header for af_packet"), ip_send_check() would be called twice for defragmentation that occurs from netfilter ipv4 defrag hooks. Remove the extra call. Signed-off-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index a04dee5..d88da36 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -31,10 +31,8 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, err = ip_defrag(net, skb, user); local_bh_enable(); - if (!err) { - ip_send_check(ip_hdr(skb)); + if (!err) skb->ignore_df = 1; - } return err; } -- cgit v0.10.2 From a67dd266adf42a24df31380e9da78390bb4d65ef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Feb 2016 10:08:35 +0100 Subject: netfilter: xtables: prepare for on-demand hook register This change prepares for upcoming on-demand xtables hook registration. We change the protoypes of the register/unregister functions. A followup patch will then add nf_hook_register/unregister calls to the iptables one. Once a hook is registered packets will be picked up, so all assignments of the form net->ipv4.iptable_$table = new_table have to be moved to ip(6)t_register_table, else we can see NULL net->ipv4.iptable_$table later. This patch doesn't change functionality; without this the actual change simply gets too big. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 6f074db..029b95e 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -48,10 +48,11 @@ struct arpt_error { } extern void *arpt_alloc_initial_table(const struct xt_table *); -extern struct xt_table *arpt_register_table(struct net *net, - const struct xt_table *table, - const struct arpt_replace *repl); -extern void arpt_unregister_table(struct xt_table *table); +int arpt_register_table(struct net *net, const struct xt_table *table, + const struct arpt_replace *repl, + const struct nf_hook_ops *ops, struct xt_table **res); +void arpt_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index aa598f9..7bfc589 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -24,10 +24,11 @@ extern void ipt_init(void) __init; -extern struct xt_table *ipt_register_table(struct net *net, - const struct xt_table *table, - const struct ipt_replace *repl); -extern void ipt_unregister_table(struct net *net, struct xt_table *table); +int ipt_register_table(struct net *net, const struct xt_table *table, + const struct ipt_replace *repl, + const struct nf_hook_ops *ops, struct xt_table **res); +void ipt_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); /* Standard entry. */ struct ipt_standard { diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 0f76e5c..b21c392 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -25,10 +25,11 @@ extern void ip6t_init(void) __init; extern void *ip6t_alloc_initial_table(const struct xt_table *); -extern struct xt_table *ip6t_register_table(struct net *net, - const struct xt_table *table, - const struct ip6t_replace *repl); -extern void ip6t_unregister_table(struct net *net, struct xt_table *table); +int ip6t_register_table(struct net *net, const struct xt_table *table, + const struct ip6t_replace *repl, + const struct nf_hook_ops *ops, struct xt_table **res); +void ip6t_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b488cac..00eed08 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1780,9 +1780,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -struct xt_table *arpt_register_table(struct net *net, - const struct xt_table *table, - const struct arpt_replace *repl) +int arpt_register_table(struct net *net, + const struct xt_table *table, + const struct arpt_replace *repl, + const struct nf_hook_ops *ops, + struct xt_table **res) { int ret; struct xt_table_info *newinfo; @@ -1791,10 +1793,8 @@ struct xt_table *arpt_register_table(struct net *net, struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) { - ret = -ENOMEM; - goto out; - } + if (!newinfo) + return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -1809,15 +1809,18 @@ struct xt_table *arpt_register_table(struct net *net, ret = PTR_ERR(new_table); goto out_free; } - return new_table; + + WRITE_ONCE(*res, new_table); + + return ret; out_free: xt_free_table_info(newinfo); -out: - return ERR_PTR(ret); + return ret; } -void arpt_unregister_table(struct xt_table *table) +void arpt_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { struct xt_table_info *private; void *loc_cpu_entry; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 1897ee1..4c02416 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -38,19 +38,20 @@ static struct nf_hook_ops *arpfilter_ops __read_mostly; static int __net_init arptable_filter_net_init(struct net *net) { struct arpt_replace *repl; - + int err; + repl = arpt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; - net->ipv4.arptable_filter = - arpt_register_table(net, &packet_filter, repl); + err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops, + &net->ipv4.arptable_filter); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter); + return err; } static void __net_exit arptable_filter_net_exit(struct net *net) { - arpt_unregister_table(net->ipv4.arptable_filter); + arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); } static struct pernet_operations arptable_filter_net_ops = { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b99affa..1eb4fe5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2062,9 +2062,9 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ipt_register_table(struct net *net, - const struct xt_table *table, - const struct ipt_replace *repl) +int ipt_register_table(struct net *net, const struct xt_table *table, + const struct ipt_replace *repl, + const struct nf_hook_ops *ops, struct xt_table **res) { int ret; struct xt_table_info *newinfo; @@ -2073,10 +2073,8 @@ struct xt_table *ipt_register_table(struct net *net, struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) { - ret = -ENOMEM; - goto out; - } + if (!newinfo) + return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -2091,15 +2089,16 @@ struct xt_table *ipt_register_table(struct net *net, goto out_free; } - return new_table; + WRITE_ONCE(*res, new_table); + return ret; out_free: xt_free_table_info(newinfo); -out: - return ERR_PTR(ret); + return ret; } -void ipt_unregister_table(struct net *net, struct xt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { struct xt_table_info *private; void *loc_cpu_entry; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 397ef2d..3fbe4ac 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -54,6 +54,7 @@ module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) { struct ipt_replace *repl; + int err; repl = ipt_alloc_initial_table(&packet_filter); if (repl == NULL) @@ -62,15 +63,15 @@ static int __net_init iptable_filter_net_init(struct net *net) ((struct ipt_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; - net->ipv4.iptable_filter = - ipt_register_table(net, &packet_filter, repl); + err = ipt_register_table(net, &packet_filter, repl, filter_ops, + &net->ipv4.iptable_filter); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter); + return err; } static void __net_exit iptable_filter_net_exit(struct net *net) { - ipt_unregister_table(net, net->ipv4.iptable_filter); + ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops); } static struct pernet_operations iptable_filter_net_ops = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index ba5d392..668e7916 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -96,19 +96,20 @@ static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init iptable_mangle_net_init(struct net *net) { struct ipt_replace *repl; + int ret; repl = ipt_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; - net->ipv4.iptable_mangle = - ipt_register_table(net, &packet_mangler, repl); + ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops, + &net->ipv4.iptable_mangle); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle); + return ret; } static void __net_exit iptable_mangle_net_exit(struct net *net) { - ipt_unregister_table(net, net->ipv4.iptable_mangle); + ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); } static struct pernet_operations iptable_mangle_net_ops = { diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ae2cd27..e984f1d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -98,18 +98,20 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { static int __net_init iptable_nat_net_init(struct net *net) { struct ipt_replace *repl; + int ret; repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); if (repl == NULL) return -ENOMEM; - net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + ret = ipt_register_table(net, &nf_nat_ipv4_table, repl, + nf_nat_ipv4_ops, &net->ipv4.nat_table); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.nat_table); + return ret; } static void __net_exit iptable_nat_net_exit(struct net *net) { - ipt_unregister_table(net, net->ipv4.nat_table); + ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops); } static struct pernet_operations iptable_nat_net_ops = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1ba0281..9d78780 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -37,19 +37,20 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_net_init(struct net *net) { struct ipt_replace *repl; + int ret; repl = ipt_alloc_initial_table(&packet_raw); if (repl == NULL) return -ENOMEM; - net->ipv4.iptable_raw = - ipt_register_table(net, &packet_raw, repl); + ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops, + &net->ipv4.iptable_raw); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw); + return ret; } static void __net_exit iptable_raw_net_exit(struct net *net) { - ipt_unregister_table(net, net->ipv4.iptable_raw); + ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops); } static struct pernet_operations iptable_raw_net_ops = { diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index c2e23d5..88bc52f 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -54,19 +54,20 @@ static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init iptable_security_net_init(struct net *net) { struct ipt_replace *repl; + int ret; repl = ipt_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; - net->ipv4.iptable_security = - ipt_register_table(net, &security_table, repl); + ret = ipt_register_table(net, &security_table, repl, sectbl_ops, + &net->ipv4.iptable_security); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv4.iptable_security); + return ret; } static void __net_exit iptable_security_net_exit(struct net *net) { - ipt_unregister_table(net, net->ipv4.iptable_security); + ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); } static struct pernet_operations iptable_security_net_ops = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 99425cf..052d744 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2071,9 +2071,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ip6t_register_table(struct net *net, - const struct xt_table *table, - const struct ip6t_replace *repl) +int ip6t_register_table(struct net *net, const struct xt_table *table, + const struct ip6t_replace *repl, + const struct nf_hook_ops *ops, + struct xt_table **res) { int ret; struct xt_table_info *newinfo; @@ -2082,10 +2083,8 @@ struct xt_table *ip6t_register_table(struct net *net, struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) { - ret = -ENOMEM; - goto out; - } + if (!newinfo) + return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -2099,15 +2098,17 @@ struct xt_table *ip6t_register_table(struct net *net, ret = PTR_ERR(new_table); goto out_free; } - return new_table; + + WRITE_ONCE(*res, new_table); + return ret; out_free: xt_free_table_info(newinfo); -out: - return ERR_PTR(ret); + return ret; } -void ip6t_unregister_table(struct net *net, struct xt_table *table) +void ip6t_unregister_table(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { struct xt_table_info *private; void *loc_cpu_entry; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 8b277b9..d191d54 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -47,6 +47,7 @@ module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) { struct ip6t_replace *repl; + int err; repl = ip6t_alloc_initial_table(&packet_filter); if (repl == NULL) @@ -55,15 +56,15 @@ static int __net_init ip6table_filter_net_init(struct net *net) ((struct ip6t_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; - net->ipv6.ip6table_filter = - ip6t_register_table(net, &packet_filter, repl); + err = ip6t_register_table(net, &packet_filter, repl, filter_ops, + &net->ipv6.ip6table_filter); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter); + return err; } static void __net_exit ip6table_filter_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_filter); + ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); } static struct pernet_operations ip6table_filter_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index abe278b..fe43d08 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -91,19 +91,20 @@ static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init ip6table_mangle_net_init(struct net *net) { struct ip6t_replace *repl; + int ret; repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_mangle = - ip6t_register_table(net, &packet_mangler, repl); + ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops, + &net->ipv6.ip6table_mangle); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle); + return ret; } static void __net_exit ip6table_mangle_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_mangle); + ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); } static struct pernet_operations ip6table_mangle_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index de2a10a..7f9740e 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -100,18 +100,20 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { static int __net_init ip6table_nat_net_init(struct net *net) { struct ip6t_replace *repl; + int ret; repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl, + nf_nat_ipv6_ops, &net->ipv6.ip6table_nat); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat); + return ret; } static void __net_exit ip6table_nat_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_nat); + ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops); } static struct pernet_operations ip6table_nat_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9021963..5fac433 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -30,19 +30,20 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init ip6table_raw_net_init(struct net *net) { struct ip6t_replace *repl; + int ret; repl = ip6t_alloc_initial_table(&packet_raw); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_raw = - ip6t_register_table(net, &packet_raw, repl); + ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops, + &net->ipv6.ip6table_raw); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw); + return ret; } static void __net_exit ip6table_raw_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_raw); + ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); } static struct pernet_operations ip6table_raw_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 0d856fe..cf58745 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -47,19 +47,20 @@ static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init ip6table_security_net_init(struct net *net) { struct ip6t_replace *repl; + int ret; repl = ip6t_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; - net->ipv6.ip6table_security = - ip6t_register_table(net, &security_table, repl); + ret = ip6t_register_table(net, &security_table, repl, sectbl_ops, + &net->ipv6.ip6table_security); kfree(repl); - return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security); + return ret; } static void __net_exit ip6table_security_net_exit(struct net *net) { - ip6t_unregister_table(net, net->ipv6.ip6table_security); + ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); } static struct pernet_operations ip6table_security_net_ops = { -- cgit v0.10.2 From b9e69e127397187b70c813a4397cce7afb5e8cb1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Feb 2016 10:08:36 +0100 Subject: netfilter: xtables: don't hook tables by default delay hook registration until the table is being requested inside a namespace. Historically, a particular table (iptables mangle, ip6tables filter, etc) was registered on module load. When netns support was added to iptables only the ip/ip6tables ruleset was made namespace aware, not the actual hook points. This means f.e. that when ipt_filter table/module is loaded on a system, then each namespace on that system has an (empty) iptables filter ruleset. In other words, if a namespace sends a packet, such skb is 'caught' by netfilter machinery and fed to hooking points for that table (i.e. INPUT, FORWARD, etc). Thanks to Eric Biederman, hooks are no longer global, but per namespace. This means that we can avoid allocation of empty ruleset in a namespace and defer hook registration until we need the functionality. We register a tables hook entry points ONLY in the initial namespace. When an iptables get/setockopt is issued inside a given namespace, we check if the table is found in the per-namespace list. If not, we attempt to find it in the initial namespace, and, if found, create an empty default table in the requesting namespace and register the needed hooks. Hook points are destroyed only once namespace is deleted, there is no 'usage count' (it makes no sense since there is no 'remove table' operation in xtables api). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c557741..80a305b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -200,6 +200,9 @@ struct xt_table { u_int8_t af; /* address/protocol family */ int priority; /* hook order */ + /* called when table is needed in the given netns */ + int (*table_init)(struct net *net); + /* A unique name... */ const char name[XT_TABLE_MAXNAMELEN]; }; @@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) return cnt; } -struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); -void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); +struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 00eed08..bf08192 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1780,6 +1780,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } +static void __arpt_unregister_table(struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct arpt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, @@ -1810,8 +1828,15 @@ int arpt_register_table(struct net *net, goto out_free; } + /* set res now, will see skbs right after nf_register_net_hooks */ WRITE_ONCE(*res, new_table); + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret != 0) { + __arpt_unregister_table(new_table); + *res = NULL; + } + return ret; out_free: @@ -1822,20 +1847,8 @@ out_free: void arpt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { - struct xt_table_info *private; - void *loc_cpu_entry; - struct module *table_owner = table->me; - struct arpt_entry *iter; - - private = xt_unregister_table(table); - - /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries; - xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter); - if (private->number > private->initial_entries) - module_put(table_owner); - xt_free_table_info(private); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __arpt_unregister_table(table); } /* The built-in targets: standard (NULL) and error. */ diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 4c02416..dd8c80d 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) +static int __net_init arptable_filter_table_init(struct net *net); + static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_ARP, .priority = NF_IP_PRI_FILTER, + .table_init = arptable_filter_table_init, }; /* The work comes in here from netfilter.c */ @@ -35,11 +38,14 @@ arptable_filter_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *arpfilter_ops __read_mostly; -static int __net_init arptable_filter_net_init(struct net *net) +static int __net_init arptable_filter_table_init(struct net *net) { struct arpt_replace *repl; int err; + if (net->ipv4.arptable_filter) + return 0; + repl = arpt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; @@ -51,11 +57,13 @@ static int __net_init arptable_filter_net_init(struct net *net) static void __net_exit arptable_filter_net_exit(struct net *net) { + if (!net->ipv4.arptable_filter) + return; arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); + net->ipv4.arptable_filter = NULL; } static struct pernet_operations arptable_filter_net_ops = { - .init = arptable_filter_net_init, .exit = arptable_filter_net_exit, }; @@ -63,26 +71,23 @@ static int __init arptable_filter_init(void) { int ret; + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); + if (IS_ERR(arpfilter_ops)) + return PTR_ERR(arpfilter_ops); + ret = register_pernet_subsys(&arptable_filter_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(arpfilter_ops); return ret; - - arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); - if (IS_ERR(arpfilter_ops)) { - ret = PTR_ERR(arpfilter_ops); - goto cleanup_table; } - return ret; -cleanup_table: - unregister_pernet_subsys(&arptable_filter_net_ops); return ret; } static void __exit arptable_filter_fini(void) { - xt_hook_unlink(&packet_filter, arpfilter_ops); unregister_pernet_subsys(&arptable_filter_net_ops); + kfree(arpfilter_ops); } module_init(arptable_filter_init); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1eb4fe5..e53f8d6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2062,6 +2062,24 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } +static void __ipt_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ipt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res) @@ -2089,7 +2107,15 @@ int ipt_register_table(struct net *net, const struct xt_table *table, goto out_free; } + /* set res now, will see skbs right after nf_register_net_hooks */ WRITE_ONCE(*res, new_table); + + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret != 0) { + __ipt_unregister_table(net, new_table); + *res = NULL; + } + return ret; out_free: @@ -2100,20 +2126,8 @@ out_free: void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { - struct xt_table_info *private; - void *loc_cpu_entry; - struct module *table_owner = table->me; - struct ipt_entry *iter; - - private = xt_unregister_table(table); - - /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries; - xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter, net); - if (private->number > private->initial_entries) - module_put(table_owner); - xt_free_table_info(private); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __ipt_unregister_table(net, table); } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3fbe4ac..7667f22 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) +static int __net_init iptable_filter_table_init(struct net *net); static const struct xt_table packet_filter = { .name = "filter", @@ -30,6 +31,7 @@ static const struct xt_table packet_filter = { .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_FILTER, + .table_init = iptable_filter_table_init, }; static unsigned int @@ -48,14 +50,17 @@ iptable_filter_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static bool forward = true; +static bool forward __read_mostly = true; module_param(forward, bool, 0000); -static int __net_init iptable_filter_net_init(struct net *net) +static int __net_init iptable_filter_table_init(struct net *net) { struct ipt_replace *repl; int err; + if (net->ipv4.iptable_filter) + return 0; + repl = ipt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; @@ -69,9 +74,20 @@ static int __net_init iptable_filter_net_init(struct net *net) return err; } +static int __net_init iptable_filter_net_init(struct net *net) +{ + if (net == &init_net || !forward) + return iptable_filter_table_init(net); + + return 0; +} + static void __net_exit iptable_filter_net_exit(struct net *net) { + if (!net->ipv4.iptable_filter) + return; ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops); + net->ipv4.iptable_filter = NULL; } static struct pernet_operations iptable_filter_net_ops = { @@ -83,24 +99,21 @@ static int __init iptable_filter_init(void) { int ret; + filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); + if (IS_ERR(filter_ops)) + return PTR_ERR(filter_ops); + ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) - return ret; - - /* Register hooks */ - filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); - if (IS_ERR(filter_ops)) { - ret = PTR_ERR(filter_ops); - unregister_pernet_subsys(&iptable_filter_net_ops); - } + kfree(filter_ops); return ret; } static void __exit iptable_filter_fini(void) { - xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&iptable_filter_net_ops); + kfree(filter_ops); } module_init(iptable_filter_init); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 668e7916..57fc97c 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) +static int __net_init iptable_mangle_table_init(struct net *net); + static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_MANGLE, + .table_init = iptable_mangle_table_init, }; static unsigned int @@ -92,12 +95,14 @@ iptable_mangle_hook(void *priv, } static struct nf_hook_ops *mangle_ops __read_mostly; - -static int __net_init iptable_mangle_net_init(struct net *net) +static int __net_init iptable_mangle_table_init(struct net *net) { struct ipt_replace *repl; int ret; + if (net->ipv4.iptable_mangle) + return 0; + repl = ipt_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; @@ -109,11 +114,13 @@ static int __net_init iptable_mangle_net_init(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net) { + if (!net->ipv4.iptable_mangle) + return; ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); + net->ipv4.iptable_mangle = NULL; } static struct pernet_operations iptable_mangle_net_ops = { - .init = iptable_mangle_net_init, .exit = iptable_mangle_net_exit, }; @@ -121,15 +128,22 @@ static int __init iptable_mangle_init(void) { int ret; + mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); + return ret; + } + ret = register_pernet_subsys(&iptable_mangle_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(mangle_ops); return ret; + } - /* Register hooks */ - mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); - if (IS_ERR(mangle_ops)) { - ret = PTR_ERR(mangle_ops); + ret = iptable_mangle_table_init(&init_net); + if (ret) { unregister_pernet_subsys(&iptable_mangle_net_ops); + kfree(mangle_ops); } return ret; @@ -137,8 +151,8 @@ static int __init iptable_mangle_init(void) static void __exit iptable_mangle_fini(void) { - xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&iptable_mangle_net_ops); + kfree(mangle_ops); } module_init(iptable_mangle_init); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index e984f1d..138a24b 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -18,6 +18,8 @@ #include #include +static int __net_init iptable_nat_table_init(struct net *net); + static const struct xt_table nf_nat_ipv4_table = { .name = "nat", .valid_hooks = (1 << NF_INET_PRE_ROUTING) | @@ -26,6 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV4, + .table_init = iptable_nat_table_init, }; static unsigned int iptable_nat_do_chain(void *priv, @@ -95,11 +98,14 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { }, }; -static int __net_init iptable_nat_net_init(struct net *net) +static int __net_init iptable_nat_table_init(struct net *net) { struct ipt_replace *repl; int ret; + if (net->ipv4.nat_table) + return 0; + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); if (repl == NULL) return -ENOMEM; @@ -111,36 +117,31 @@ static int __net_init iptable_nat_net_init(struct net *net) static void __net_exit iptable_nat_net_exit(struct net *net) { + if (!net->ipv4.nat_table) + return; ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops); + net->ipv4.nat_table = NULL; } static struct pernet_operations iptable_nat_net_ops = { - .init = iptable_nat_net_init, .exit = iptable_nat_net_exit, }; static int __init iptable_nat_init(void) { - int err; - - err = register_pernet_subsys(&iptable_nat_net_ops); - if (err < 0) - goto err1; + int ret = register_pernet_subsys(&iptable_nat_net_ops); - err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); - if (err < 0) - goto err2; - return 0; + if (ret) + return ret; -err2: - unregister_pernet_subsys(&iptable_nat_net_ops); -err1: - return err; + ret = iptable_nat_table_init(&init_net); + if (ret) + unregister_pernet_subsys(&iptable_nat_net_ops); + return ret; } static void __exit iptable_nat_exit(void) { - nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); unregister_pernet_subsys(&iptable_nat_net_ops); } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 9d78780..2642ecd 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -10,12 +10,15 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) +static int __net_init iptable_raw_table_init(struct net *net); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_RAW, + .table_init = iptable_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -34,11 +37,14 @@ iptable_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init iptable_raw_net_init(struct net *net) +static int __net_init iptable_raw_table_init(struct net *net) { struct ipt_replace *repl; int ret; + if (net->ipv4.iptable_raw) + return 0; + repl = ipt_alloc_initial_table(&packet_raw); if (repl == NULL) return -ENOMEM; @@ -50,11 +56,13 @@ static int __net_init iptable_raw_net_init(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net) { + if (!net->ipv4.iptable_raw) + return; ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops); + net->ipv4.iptable_raw = NULL; } static struct pernet_operations iptable_raw_net_ops = { - .init = iptable_raw_net_init, .exit = iptable_raw_net_exit, }; @@ -62,15 +70,20 @@ static int __init iptable_raw_init(void) { int ret; + rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook); + if (IS_ERR(rawtable_ops)) + return PTR_ERR(rawtable_ops); + ret = register_pernet_subsys(&iptable_raw_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(rawtable_ops); return ret; + } - /* Register hooks */ - rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); - if (IS_ERR(rawtable_ops)) { - ret = PTR_ERR(rawtable_ops); + ret = iptable_raw_table_init(&init_net); + if (ret) { unregister_pernet_subsys(&iptable_raw_net_ops); + kfree(rawtable_ops); } return ret; @@ -78,8 +91,8 @@ static int __init iptable_raw_init(void) static void __exit iptable_raw_fini(void) { - xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&iptable_raw_net_ops); + kfree(rawtable_ops); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 88bc52f..ff22659 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) +static int __net_init iptable_security_table_init(struct net *net); + static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_SECURITY, + .table_init = iptable_security_table_init, }; static unsigned int @@ -51,11 +54,14 @@ iptable_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init iptable_security_net_init(struct net *net) +static int __net_init iptable_security_table_init(struct net *net) { struct ipt_replace *repl; int ret; + if (net->ipv4.iptable_security) + return 0; + repl = ipt_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; @@ -67,11 +73,14 @@ static int __net_init iptable_security_net_init(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net) { + if (!net->ipv4.iptable_security) + return; + ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); + net->ipv4.iptable_security = NULL; } static struct pernet_operations iptable_security_net_ops = { - .init = iptable_security_net_init, .exit = iptable_security_net_exit, }; @@ -79,27 +88,29 @@ static int __init iptable_security_init(void) { int ret; + sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); + if (IS_ERR(sectbl_ops)) + return PTR_ERR(sectbl_ops); + ret = register_pernet_subsys(&iptable_security_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(sectbl_ops); return ret; - - sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); - if (IS_ERR(sectbl_ops)) { - ret = PTR_ERR(sectbl_ops); - goto cleanup_table; } - return ret; + ret = iptable_security_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&iptable_security_net_ops); + kfree(sectbl_ops); + } -cleanup_table: - unregister_pernet_subsys(&iptable_security_net_ops); return ret; } static void __exit iptable_security_fini(void) { - xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&iptable_security_net_ops); + kfree(sectbl_ops); } module_init(iptable_security_init); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 052d744..84f9baf 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2071,6 +2071,24 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } +static void __ip6t_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ip6t_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops, @@ -2099,7 +2117,15 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, goto out_free; } + /* set res now, will see skbs right after nf_register_net_hooks */ WRITE_ONCE(*res, new_table); + + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret != 0) { + __ip6t_unregister_table(net, new_table); + *res = NULL; + } + return ret; out_free: @@ -2110,20 +2136,8 @@ out_free: void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { - struct xt_table_info *private; - void *loc_cpu_entry; - struct module *table_owner = table->me; - struct ip6t_entry *iter; - - private = xt_unregister_table(table); - - /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries; - xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter, net); - if (private->number > private->initial_entries) - module_put(table_owner); - xt_free_table_info(private); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __ip6t_unregister_table(net, table); } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index d191d54..1343077 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) +static int __net_init ip6table_filter_table_init(struct net *net); + static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, + .table_init = ip6table_filter_table_init, }; /* The work comes in here from netfilter.c. */ @@ -44,11 +47,14 @@ static struct nf_hook_ops *filter_ops __read_mostly; static bool forward = true; module_param(forward, bool, 0000); -static int __net_init ip6table_filter_net_init(struct net *net) +static int __net_init ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; int err; + if (net->ipv6.ip6table_filter) + return 0; + repl = ip6t_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; @@ -62,9 +68,20 @@ static int __net_init ip6table_filter_net_init(struct net *net) return err; } +static int __net_init ip6table_filter_net_init(struct net *net) +{ + if (net == &init_net || !forward) + return ip6table_filter_table_init(net); + + return 0; +} + static void __net_exit ip6table_filter_net_exit(struct net *net) { + if (!net->ipv6.ip6table_filter) + return; ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); + net->ipv6.ip6table_filter = NULL; } static struct pernet_operations ip6table_filter_net_ops = { @@ -76,28 +93,21 @@ static int __init ip6table_filter_init(void) { int ret; + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); + if (IS_ERR(filter_ops)) + return PTR_ERR(filter_ops); + ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) - return ret; - - /* Register hooks */ - filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook); - if (IS_ERR(filter_ops)) { - ret = PTR_ERR(filter_ops); - goto cleanup_table; - } + kfree(filter_ops); return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_filter_net_ops); - return ret; } static void __exit ip6table_filter_fini(void) { - xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&ip6table_filter_net_ops); + kfree(filter_ops); } module_init(ip6table_filter_init); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index fe43d08..cb2b288 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) +static int __net_init ip6table_mangle_table_init(struct net *net); + static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, + .table_init = ip6table_mangle_table_init, }; static unsigned int @@ -88,11 +91,14 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, } static struct nf_hook_ops *mangle_ops __read_mostly; -static int __net_init ip6table_mangle_net_init(struct net *net) +static int __net_init ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; + if (net->ipv6.ip6table_mangle) + return 0; + repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; @@ -104,11 +110,14 @@ static int __net_init ip6table_mangle_net_init(struct net *net) static void __net_exit ip6table_mangle_net_exit(struct net *net) { + if (!net->ipv6.ip6table_mangle) + return; + ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); + net->ipv6.ip6table_mangle = NULL; } static struct pernet_operations ip6table_mangle_net_ops = { - .init = ip6table_mangle_net_init, .exit = ip6table_mangle_net_exit, }; @@ -116,28 +125,28 @@ static int __init ip6table_mangle_init(void) { int ret; + mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); + ret = register_pernet_subsys(&ip6table_mangle_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(mangle_ops); return ret; - - /* Register hooks */ - mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - ret = PTR_ERR(mangle_ops); - goto cleanup_table; } - return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_mangle_net_ops); + ret = ip6table_mangle_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_mangle_net_ops); + kfree(mangle_ops); + } return ret; } static void __exit ip6table_mangle_fini(void) { - xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&ip6table_mangle_net_ops); + kfree(mangle_ops); } module_init(ip6table_mangle_init); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 7f9740e..7d2bd94 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -20,6 +20,8 @@ #include #include +static int __net_init ip6table_nat_table_init(struct net *net); + static const struct xt_table nf_nat_ipv6_table = { .name = "nat", .valid_hooks = (1 << NF_INET_PRE_ROUTING) | @@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV6, + .table_init = ip6table_nat_table_init, }; static unsigned int ip6table_nat_do_chain(void *priv, @@ -97,11 +100,14 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { }, }; -static int __net_init ip6table_nat_net_init(struct net *net) +static int __net_init ip6table_nat_table_init(struct net *net) { struct ip6t_replace *repl; int ret; + if (net->ipv6.ip6table_nat) + return 0; + repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); if (repl == NULL) return -ENOMEM; @@ -113,36 +119,31 @@ static int __net_init ip6table_nat_net_init(struct net *net) static void __net_exit ip6table_nat_net_exit(struct net *net) { + if (!net->ipv6.ip6table_nat) + return; ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops); + net->ipv6.ip6table_nat = NULL; } static struct pernet_operations ip6table_nat_net_ops = { - .init = ip6table_nat_net_init, .exit = ip6table_nat_net_exit, }; static int __init ip6table_nat_init(void) { - int err; - - err = register_pernet_subsys(&ip6table_nat_net_ops); - if (err < 0) - goto err1; + int ret = register_pernet_subsys(&ip6table_nat_net_ops); - err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); - if (err < 0) - goto err2; - return 0; + if (ret) + return ret; -err2: - unregister_pernet_subsys(&ip6table_nat_net_ops); -err1: - return err; + ret = ip6table_nat_table_init(&init_net); + if (ret) + unregister_pernet_subsys(&ip6table_nat_net_ops); + return ret; } static void __exit ip6table_nat_exit(void) { - nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); unregister_pernet_subsys(&ip6table_nat_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5fac433..d4bc564 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -9,12 +9,15 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) +static int __net_init ip6table_raw_table_init(struct net *net); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW, + .table_init = ip6table_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -27,11 +30,14 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init ip6table_raw_net_init(struct net *net) +static int __net_init ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; int ret; + if (net->ipv6.ip6table_raw) + return 0; + repl = ip6t_alloc_initial_table(&packet_raw); if (repl == NULL) return -ENOMEM; @@ -43,11 +49,13 @@ static int __net_init ip6table_raw_net_init(struct net *net) static void __net_exit ip6table_raw_net_exit(struct net *net) { + if (!net->ipv6.ip6table_raw) + return; ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); + net->ipv6.ip6table_raw = NULL; } static struct pernet_operations ip6table_raw_net_ops = { - .init = ip6table_raw_net_init, .exit = ip6table_raw_net_exit, }; @@ -55,28 +63,29 @@ static int __init ip6table_raw_init(void) { int ret; + /* Register hooks */ + rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); + if (IS_ERR(rawtable_ops)) + return PTR_ERR(rawtable_ops); + ret = register_pernet_subsys(&ip6table_raw_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(rawtable_ops); return ret; - - /* Register hooks */ - rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook); - if (IS_ERR(rawtable_ops)) { - ret = PTR_ERR(rawtable_ops); - goto cleanup_table; } - return ret; - - cleanup_table: - unregister_pernet_subsys(&ip6table_raw_net_ops); + ret = ip6table_raw_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_raw_net_ops); + kfree(rawtable_ops); + } return ret; } static void __exit ip6table_raw_fini(void) { - xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&ip6table_raw_net_ops); + kfree(rawtable_ops); } module_init(ip6table_raw_init); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index cf58745..cf26ccb 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) +static int __net_init ip6table_security_table_init(struct net *net); + static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_SECURITY, + .table_init = ip6table_security_table_init, }; static unsigned int @@ -44,11 +47,14 @@ ip6table_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init ip6table_security_net_init(struct net *net) +static int __net_init ip6table_security_table_init(struct net *net) { struct ip6t_replace *repl; int ret; + if (net->ipv6.ip6table_security) + return 0; + repl = ip6t_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; @@ -60,11 +66,13 @@ static int __net_init ip6table_security_net_init(struct net *net) static void __net_exit ip6table_security_net_exit(struct net *net) { + if (!net->ipv6.ip6table_security) + return; ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); + net->ipv6.ip6table_security = NULL; } static struct pernet_operations ip6table_security_net_ops = { - .init = ip6table_security_net_init, .exit = ip6table_security_net_exit, }; @@ -72,27 +80,28 @@ static int __init ip6table_security_init(void) { int ret; + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); + if (IS_ERR(sectbl_ops)) + return PTR_ERR(sectbl_ops); + ret = register_pernet_subsys(&ip6table_security_net_ops); - if (ret < 0) + if (ret < 0) { + kfree(sectbl_ops); return ret; - - sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook); - if (IS_ERR(sectbl_ops)) { - ret = PTR_ERR(sectbl_ops); - goto cleanup_table; } - return ret; - -cleanup_table: - unregister_pernet_subsys(&ip6table_security_net_ops); + ret = ip6table_security_table_init(&init_net); + if (ret) { + unregister_pernet_subsys(&ip6table_security_net_ops); + kfree(sectbl_ops); + } return ret; } static void __exit ip6table_security_fini(void) { - xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&ip6table_security_net_ops); + kfree(sectbl_ops); } module_init(ip6table_security_init); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c8a0b7d..d0cd2b9 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -694,12 +694,45 @@ EXPORT_SYMBOL(xt_free_table_info); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { - struct xt_table *t; + struct xt_table *t, *found = NULL; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &net->xt.tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; + + if (net == &init_net) + goto out; + + /* Table doesn't exist in this netns, re-try init */ + list_for_each_entry(t, &init_net.xt.tables[af], list) { + if (strcmp(t->name, name)) + continue; + if (!try_module_get(t->me)) + return NULL; + + mutex_unlock(&xt[af].mutex); + if (t->table_init(net) != 0) { + module_put(t->me); + return NULL; + } + + found = t; + + mutex_lock(&xt[af].mutex); + break; + } + + if (!found) + goto out; + + /* and once again: */ + list_for_each_entry(t, &net->xt.tables[af], list) + if (strcmp(t->name, name) == 0) + return t; + + module_put(found->me); + out: mutex_unlock(&xt[af].mutex); return NULL; } @@ -1170,20 +1203,20 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ /** - * xt_hook_link - set up hooks for a new table + * xt_hook_ops_alloc - set up hooks for a new table * @table: table with metadata needed to set up hooks * @fn: Hook function * - * This function will take care of creating and registering the necessary - * Netfilter hooks for XT tables. + * This function will create the nf_hook_ops that the x_table needs + * to hand to xt_hook_link_net(). */ -struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) +struct nf_hook_ops * +xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) { unsigned int hook_mask = table->valid_hooks; uint8_t i, num_hooks = hweight32(hook_mask); uint8_t hooknum; struct nf_hook_ops *ops; - int ret; ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) @@ -1200,27 +1233,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) ++i; } - ret = nf_register_hooks(ops, num_hooks); - if (ret < 0) { - kfree(ops); - return ERR_PTR(ret); - } - return ops; } -EXPORT_SYMBOL_GPL(xt_hook_link); - -/** - * xt_hook_unlink - remove hooks for a table - * @ops: nf_hook_ops array as returned by nf_hook_link - * @hook_mask: the very same mask that was passed to nf_hook_link - */ -void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops) -{ - nf_unregister_hooks(ops, hweight32(table->valid_hooks)); - kfree(ops); -} -EXPORT_SYMBOL_GPL(xt_hook_unlink); +EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); int xt_proto_init(struct net *net, u_int8_t af) { -- cgit v0.10.2 From 5f6c253ebe93b02dece01c6f58447f16b29f6dd3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Feb 2016 10:08:37 +0100 Subject: netfilter: bridge: register hooks only when bridge interface is added This moves bridge hooks to a register-when-needed scheme. We use a device notifier to register the 'call-iptables' netfilter hooks only once a bridge gets added. This means that if the initial namespace uses a bridge, newly created network namespaces no longer get the PRE_ROUTING ipt_sabotage hook. It will registered in that network namespace once a bridge is created within that namespace. A few modules still use global hooks: - conntrack - bridge PF_BRIDGE hooks - IPVS - CLUSTER match (deprecated) - SYNPROXY As long as these modules are not loaded/used, a new network namespace has empty hook list and NF_HOOK() will boil down to single list_empty test even if initial namespace does stateless packet filtering. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7ddbe7e..44114a9 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -44,6 +45,12 @@ #include #endif +static int brnf_net_id __read_mostly; + +struct brnf_net { + bool enabled; +}; + #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; @@ -938,6 +945,53 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { }, }; +static int brnf_device_event(struct notifier_block *unused, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct brnf_net *brnet; + struct net *net; + int ret; + + if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) + return NOTIFY_DONE; + + ASSERT_RTNL(); + + net = dev_net(dev); + brnet = net_generic(net, brnf_net_id); + if (brnet->enabled) + return NOTIFY_OK; + + ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret) + return NOTIFY_BAD; + + brnet->enabled = true; + return NOTIFY_OK; +} + +static void __net_exit brnf_exit_net(struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + if (!brnet->enabled) + return; + + nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); + brnet->enabled = false; +} + +static struct pernet_operations brnf_net_ops __read_mostly = { + .exit = brnf_exit_net, + .id = &brnf_net_id, + .size = sizeof(struct brnf_net), +}; + +static struct notifier_block brnf_notifier __read_mostly = { + .notifier_call = brnf_device_event, +}; + #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, @@ -1003,16 +1057,23 @@ static int __init br_netfilter_init(void) { int ret; - ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + ret = register_pernet_subsys(&brnf_net_ops); if (ret < 0) return ret; + ret = register_netdevice_notifier(&brnf_notifier); + if (ret < 0) { + unregister_pernet_subsys(&brnf_net_ops); + return ret; + } + #ifdef CONFIG_SYSCTL brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + unregister_netdevice_notifier(&brnf_notifier); + unregister_pernet_subsys(&brnf_net_ops); return -ENOMEM; } #endif @@ -1024,7 +1085,8 @@ static int __init br_netfilter_init(void) static void __exit br_netfilter_fini(void) { RCU_INIT_POINTER(nf_br_ops, NULL); - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + unregister_netdevice_notifier(&brnf_notifier); + unregister_pernet_subsys(&brnf_net_ops); #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(brnf_sysctl_header); #endif -- cgit v0.10.2 From af4610c39589d839551da104f7da342d86f23ea0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Feb 2016 10:08:38 +0100 Subject: netfilter: don't call hooks unless needed With the previous patches in place, a netns nf_hook_list might be empty, even if e.g. init_net performs filtering. Thus change nf_hook_thresh to check the hook_list as well before initializing hook_state and calling nf_hook_slow(). We still make use of static keys; if no netfilter modules are loaded list is guaranteed to be empty. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0ad5567..9230f9a 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -141,22 +141,6 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; - -static inline bool nf_hook_list_active(struct list_head *hook_list, - u_int8_t pf, unsigned int hook) -{ - if (__builtin_constant_p(pf) && - __builtin_constant_p(hook)) - return static_key_false(&nf_hooks_needed[pf][hook]); - - return !list_empty(hook_list); -} -#else -static inline bool nf_hook_list_active(struct list_head *hook_list, - u_int8_t pf, unsigned int hook) -{ - return !list_empty(hook_list); -} #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); @@ -177,9 +161,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list = &net->nf.hooks[pf][hook]; + struct list_head *hook_list; + +#ifdef HAVE_JUMP_LABEL + if (__builtin_constant_p(pf) && + __builtin_constant_p(hook) && + !static_key_false(&nf_hooks_needed[pf][hook])) + return 1; +#endif + + hook_list = &net->nf.hooks[pf][hook]; - if (nf_hook_list_active(hook_list, pf, hook)) { + if (!list_empty(hook_list)) { struct nf_hook_state state; nf_hook_state_init(&state, hook_list, hook, thresh, -- cgit v0.10.2 From 8a6bf5da1aefdafd60b73d9122c7af9fd2d7bb9c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Mar 2016 19:55:14 +0100 Subject: netfilter: nft_masq: support port range Complete masquerading support by allowing port range selection. Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h index e2a518b..a3f3c11 100644 --- a/include/net/netfilter/nft_masq.h +++ b/include/net/netfilter/nft_masq.h @@ -2,7 +2,9 @@ #define _NFT_MASQ_H_ struct nft_masq { - u32 flags; + u32 flags; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; }; extern const struct nla_policy nft_masq_policy[]; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b19be0a..eeffde1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -951,10 +951,14 @@ enum nft_nat_attributes { * enum nft_masq_attributes - nf_tables masquerade expression attributes * * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + * @NFTA_MASQ_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_MASQ_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) */ enum nft_masq_attributes { NFTA_MASQ_UNSPEC, NFTA_MASQ_FLAGS, + NFTA_MASQ_REG_PROTO_MIN, + NFTA_MASQ_REG_PROTO_MAX, __NFTA_MASQ_MAX }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index b72ffc5..51ced81 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -25,7 +25,12 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; - + if (priv->sreg_proto_min) { + range.min_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_min]; + range.max_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_max]; + } regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, &range, pkt->out); } diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index cd1ac16..9597ffb 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -26,7 +26,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; - + if (priv->sreg_proto_min) { + range.min_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_min]; + range.max_proto.all = + *(__be16 *)®s->data[priv->sreg_proto_max]; + } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); } diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9aea747..81b5ad6 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -17,7 +17,9 @@ #include const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { - [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, + [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, + [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(nft_masq_policy); @@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); struct nft_masq *priv = nft_expr_priv(expr); int err; @@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx, if (err) return err; - if (tb[NFTA_MASQ_FLAGS] == NULL) - return 0; - - priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); - if (priv->flags & ~NF_NAT_RANGE_MASK) - return -EINVAL; + if (tb[NFTA_MASQ_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } + + if (tb[NFTA_MASQ_REG_PROTO_MIN]) { + priv->sreg_proto_min = + nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); + + err = nft_validate_register_load(priv->sreg_proto_min, plen); + if (err < 0) + return err; + + if (tb[NFTA_MASQ_REG_PROTO_MAX]) { + priv->sreg_proto_max = + nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); + + err = nft_validate_register_load(priv->sreg_proto_max, + plen); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } return 0; } @@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_masq *priv = nft_expr_priv(expr); - if (priv->flags == 0) - return 0; - - if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags))) + if (priv->flags != 0 && + nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags))) goto nla_put_failure; + if (priv->sreg_proto_min) { + if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN, + priv->sreg_proto_min) || + nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX, + priv->sreg_proto_max)) + goto nla_put_failure; + } + return 0; nla_put_failure: -- cgit v0.10.2