From 2a971354e74f3837d14b9c8d7f7983b0c9c330e4 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 30 Jan 2014 08:50:20 +0100 Subject: ipvs: fix AF assignment in ip_vs_conn_new() If a fwmark is passed to ip_vs_conn_new(), it is passed in vaddr, not daddr. Therefore we should set AF to AF_UNSPEC in vaddr assignment (like we do in ip_vs_ct_in_get()), otherwise we may copy only first 4 bytes of an IPv6 address into cp->daddr. Signed-off-by: Bogdano Arendartchuk Signed-off-by: Michal Kubecek Acked-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 59a1a85..a8eb0a8 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -871,11 +871,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->protocol = p->protocol; ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); - cp->vport = p->vport; - /* proto should only be IPPROTO_IP if d_addr is a fwmark */ + /* proto should only be IPPROTO_IP if p->vaddr is a fwmark */ ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + &cp->vaddr, p->vaddr); + cp->vport = p->vport; + ip_vs_addr_set(p->af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; -- cgit v0.10.2 From 2a53bfb3e0fb6aa6b1ac93e5979a040a4b57ea8b Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Jan 2014 02:28:45 +0100 Subject: netfilter: nft_ct: fix unconditional dump of 'dir' attr We want to make sure that the information that we get from the kernel can be reinjected without troubles. The kernel shouldn't return an attribute that is not required, or even prohibited. Dumping unconditionally NFTA_CT_DIRECTION could lead an application in userspace to interpret that the attribute was originally set, while it was not. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 917052e..feaf0f3 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -311,8 +311,19 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) - goto nla_put_failure; + + switch (priv->key) { + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + default: + break; + } + return 0; nla_put_failure: -- cgit v0.10.2 From 3dd7279fb6db05ec5a088cd0cae6ba22580a82bd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 25 Jan 2014 08:04:07 +0000 Subject: netfilter: nf_tables: fix oops when deleting a chain with references The following commands trigger an oops: # nft -i nft> add table filter nft> add chain filter input { type filter hook input priority 0; } nft> add chain filter test nft> add rule filter input jump test nft> delete chain filter test We need to check the chain use counter before allowing destruction since we might have references from sets or jump rules. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=69341 Reported-by: Matthew Ife Tested-by: Matthew Ife Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 117bbaad..9ce3053 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1045,7 +1045,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); - if (!list_empty(&chain->rules)) + if (!list_empty(&chain->rules) || chain->use > 0) return -EBUSY; list_del(&chain->list); -- cgit v0.10.2 From c6825c0976fa7893692e0e43b09740b419b23c09 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 29 Jan 2014 19:34:14 +0100 Subject: netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get Lets look at destroy_conntrack: hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); ... nf_conntrack_free(ct) kmem_cache_free(net->ct.nf_conntrack_cachep, ct); net->ct.nf_conntrack_cachep is created with SLAB_DESTROY_BY_RCU. The hash is protected by rcu, so readers look up conntracks without locks. A conntrack is removed from the hash, but in this moment a few readers still can use the conntrack. Then this conntrack is released and another thread creates conntrack with the same address and the equal tuple. After this a reader starts to validate the conntrack: * It's not dying, because a new conntrack was created * nf_ct_tuple_equal() returns true. But this conntrack is not initialized yet, so it can not be used by two threads concurrently. In this case BUG_ON may be triggered from nf_nat_setup_info(). Florian Westphal suggested to check the confirm bit too. I think it's right. task 1 task 2 task 3 nf_conntrack_find_get ____nf_conntrack_find destroy_conntrack hlist_nulls_del_rcu nf_conntrack_free kmem_cache_free __nf_conntrack_alloc kmem_cache_alloc memset(&ct->tuplehash[IP_CT_DIR_MAX], if (nf_ct_is_dying(ct)) if (!nf_ct_tuple_equal() I'm not sure, that I have ever seen this race condition in a real life. Currently we are investigating a bug, which is reproduced on a few nodes. In our case one conntrack is initialized from a few tasks concurrently, we don't have any other explanation for this. <2>[46267.083061] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:322! ... <4>[46267.083951] RIP: 0010:[] [] nf_nat_setup_info+0x564/0x590 [nf_nat] ... <4>[46267.085549] Call Trace: <4>[46267.085622] [] alloc_null_binding+0x5b/0xa0 [iptable_nat] <4>[46267.085697] [] nf_nat_rule_find+0x5c/0x80 [iptable_nat] <4>[46267.085770] [] nf_nat_fn+0x111/0x260 [iptable_nat] <4>[46267.085843] [] nf_nat_out+0x48/0xd0 [iptable_nat] <4>[46267.085919] [] nf_iterate+0x69/0xb0 <4>[46267.085991] [] ? ip_finish_output+0x0/0x2f0 <4>[46267.086063] [] nf_hook_slow+0x74/0x110 <4>[46267.086133] [] ? ip_finish_output+0x0/0x2f0 <4>[46267.086207] [] ? dst_output+0x0/0x20 <4>[46267.086277] [] ip_output+0xa4/0xc0 <4>[46267.086346] [] raw_sendmsg+0x8b4/0x910 <4>[46267.086419] [] inet_sendmsg+0x4a/0xb0 <4>[46267.086491] [] ? sock_update_classid+0x3a/0x50 <4>[46267.086562] [] sock_sendmsg+0x117/0x140 <4>[46267.086638] [] ? _spin_unlock_bh+0x1b/0x20 <4>[46267.086712] [] ? autoremove_wake_function+0x0/0x40 <4>[46267.086785] [] ? do_ip_setsockopt+0x90/0xd80 <4>[46267.086858] [] ? call_function_interrupt+0xe/0x20 <4>[46267.086936] [] ? ub_slab_ptr+0x20/0x90 <4>[46267.087006] [] ? ub_slab_ptr+0x20/0x90 <4>[46267.087081] [] ? kmem_cache_alloc+0xd8/0x1e0 <4>[46267.087151] [] sys_sendto+0x139/0x190 <4>[46267.087229] [] ? sock_setsockopt+0x16d/0x6f0 <4>[46267.087303] [] ? audit_syscall_entry+0x1d7/0x200 <4>[46267.087378] [] ? __audit_syscall_exit+0x265/0x290 <4>[46267.087454] [] ? compat_sys_setsockopt+0x75/0x210 <4>[46267.087531] [] compat_sys_socketcall+0x13f/0x210 <4>[46267.087607] [] ia32_sysret+0x0/0x5 <4>[46267.087676] Code: 91 20 e2 01 75 29 48 89 de 4c 89 f7 e8 56 fa ff ff 85 c0 0f 84 68 fc ff ff 0f b6 4d c6 41 8b 45 00 e9 4d fb ff ff e8 7c 19 e9 e0 <0f> 0b eb fe f6 05 17 91 20 e2 80 74 ce 80 3d 5f 2e 00 00 00 74 <1>[46267.088023] RIP [] nf_nat_setup_info+0x564/0x590 Cc: Eric Dumazet Cc: Florian Westphal Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: Jozsef Kadlecsik Cc: "David S. Miller" Cc: Cyrill Gorcunov Signed-off-by: Andrey Vagin Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8824ed0..4d1fb5d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -312,6 +312,21 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); } +static inline bool +nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, + const struct nf_conntrack_tuple *tuple, + u16 zone) +{ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + /* A conntrack can be recreated with the equal tuple, + * so we need to check that the conntrack is confirmed + */ + return nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone && + nf_ct_is_confirmed(ct); +} + /* * Warning : * - Caller must take a reference on returned object @@ -333,8 +348,7 @@ ____nf_conntrack_find(struct net *net, u16 zone, local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { + if (nf_ct_key_equal(h, tuple, zone)) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -372,8 +386,7 @@ begin: !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || - nf_ct_zone(ct) != zone)) { + if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { nf_ct_put(ct); goto begin; } -- cgit v0.10.2 From 829d9315c46a2be57a8fb40c89aeb7db61513d96 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Feb 2014 13:07:24 +0100 Subject: netfilter: nf_nat_h323: fix crash in nf_ct_unlink_expect_report() Similar bug fixed in SIP module in 3f509c6 ("netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation"). BUG: unable to handle kernel paging request at 00100104 IP: [] nf_ct_unlink_expect_report+0x57/0xf0 [nf_conntrack] ... Call Trace: [] ? del_timer+0x48/0x70 [] nf_ct_remove_expectations+0x47/0x60 [nf_conntrack] [] nf_ct_delete_from_lists+0x59/0x90 [nf_conntrack] [] death_by_timeout+0x14e/0x1c0 [nf_conntrack] [] ? nf_conntrack_set_hashsize+0x190/0x190 [nf_conntrack] [] call_timer_fn+0x1d/0x80 [] run_timer_softirq+0x18e/0x1a0 [] ? nf_conntrack_set_hashsize+0x190/0x190 [nf_conntrack] [] __do_softirq+0xa3/0x170 [] ? __local_bh_enable+0x70/0x70 [] ? irq_exit+0x67/0xa0 [] ? do_IRQ+0x46/0xb0 [] ? clockevents_notify+0x35/0x110 [] ? common_interrupt+0x2c/0x40 [] ? cpuidle_enter_state+0x41/0xf0 [] ? cpuidle_idle_call+0x8b/0x100 [] ? arch_cpu_idle+0x8/0x30 [] ? cpu_idle_loop+0x4b/0x140 [] ? cpu_startup_entry+0x18/0x20 [] ? rest_init+0x5d/0x70 [] ? start_kernel+0x2ec/0x2f2 [] ? repair_env_string+0x5b/0x5b [] ? i386_start_kernel+0x33/0x35 Signed-off-by: Alexey Dobriyan Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9eea059d..574f7eb 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); nated_port = 0; break; -- cgit v0.10.2 From e53376bef2cd97d3e3f61fdc677fb8da7d03d0da Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Feb 2014 20:01:53 +0100 Subject: netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt With this patch, the conntrack refcount is initially set to zero and it is bumped once it is added to any of the list, so we fulfill Eric's golden rule which is that all released objects always have a refcount that equals zero. Andrey Vagin reports that nf_conntrack_free can't be called for a conntrack with non-zero ref-counter, because it can race with nf_conntrack_find_get(). A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero ref-counter says that this conntrack is used. So when we release a conntrack with non-zero counter, we break this assumption. CPU1 CPU2 ____nf_conntrack_find() nf_ct_put() destroy_conntrack() ... init_conntrack __nf_conntrack_alloc (set use = 1) atomic_inc_not_zero(&ct->use) (use = 2) if (!l4proto->new(ct, skb, dataoff, timeouts)) nf_conntrack_free(ct); (use = 2 !!!) ... __nf_conntrack_alloc (set use = 1) if (!nf_ct_key_equal(h, tuple, zone)) nf_ct_put(ct); (use = 0) destroy_conntrack() /* continue to work with CT */ After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get" another bug was triggered in destroy_conntrack(): <4>[67096.759334] ------------[ cut here ]------------ <2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211! ... <4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G C --------------- 2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB <4>[67096.759932] RIP: 0010:[] [] destroy_conntrack+0x15c/0x190 [nf_conntrack] <4>[67096.760255] Call Trace: <4>[67096.760255] [] nf_conntrack_destroy+0x17/0x30 <4>[67096.760255] [] nf_conntrack_find_get+0x85/0x130 [nf_conntrack] <4>[67096.760255] [] nf_conntrack_in+0x352/0xb60 [nf_conntrack] <4>[67096.760255] [] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4] <4>[67096.760255] [] nf_iterate+0x69/0xb0 <4>[67096.760255] [] ? dst_output+0x0/0x20 <4>[67096.760255] [] nf_hook_slow+0x74/0x110 <4>[67096.760255] [] ? dst_output+0x0/0x20 <4>[67096.760255] [] raw_sendmsg+0x775/0x910 <4>[67096.760255] [] ? flush_tlb_others_ipi+0x128/0x130 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] inet_sendmsg+0x4a/0xb0 <4>[67096.760255] [] ? sock_sendmsg+0x13/0x140 <4>[67096.760255] [] sock_sendmsg+0x117/0x140 <4>[67096.760255] [] ? native_smp_send_reschedule+0x49/0x60 <4>[67096.760255] [] ? _spin_unlock_bh+0x1b/0x20 <4>[67096.760255] [] ? autoremove_wake_function+0x0/0x40 <4>[67096.760255] [] ? do_ip_setsockopt+0x90/0xd80 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] sys_sendto+0x139/0x190 <4>[67096.760255] [] ? audit_syscall_entry+0x1d7/0x200 <4>[67096.760255] [] ? __audit_syscall_exit+0x265/0x290 <4>[67096.760255] [] compat_sys_socketcall+0x13f/0x210 <4>[67096.760255] [] ia32_sysret+0x0/0x5 I have reused the original title for the RFC patch that Andrey posted and most of the original patch description. Cc: Eric Dumazet Cc: Andrew Vagin Cc: Florian Westphal Reported-by: Andrew Vagin Signed-off-by: Pablo Neira Ayuso Reviewed-by: Eric Dumazet Acked-by: Andrew Vagin diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 01ea6ee..b2ac624 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -284,6 +284,8 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); +void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4d1fb5d..356bef5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -448,7 +448,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto out; add_timer(&ct->timeout); - nf_conntrack_get(&ct->ct_general); + smp_wmb(); + /* The caller holds a reference to this object */ + atomic_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); @@ -462,6 +464,21 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); +/* deletion from this larval template list happens via nf_ct_put() */ +void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) +{ + __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + nf_conntrack_get(&tmpl->ct_general); + + spin_lock_bh(&nf_conntrack_lock); + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &net->ct.tmpl); + spin_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -733,11 +750,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone, nf_ct_zone->id = zone; } #endif - /* - * changes to lookup keys must be done before setting refcnt to 1 + /* Because we use RCU lookups, we set ct_general.use to zero before + * this is inserted in any list. */ - smp_wmb(); - atomic_set(&ct->ct_general.use, 1); + atomic_set(&ct->ct_general.use, 0); return ct; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -761,6 +777,11 @@ void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + /* A freed object has refcnt == 0, that's + * the golden rule for SLAB_DESTROY_BY_RCU + */ + NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); + nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); @@ -856,6 +877,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, NF_CT_STAT_INC(net, new); } + /* Now it is inserted into the unconfirmed list, bump refcount */ + nf_conntrack_get(&ct->ct_general); + /* Overload tuple linked list to put us in unconfirmed list. */ hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &net->ct.unconfirmed); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 9858e3e..52e20c9 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) goto err2; if (!nfct_synproxy_ext_add(ct)) goto err2; - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_tmpl_insert(net, ct); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); @@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - nf_conntrack_free(snet->tmpl); + nf_ct_put(snet->tmpl); synproxy_proc_exit(net); free_percpu(snet->stats); } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 5929be6..75747ae 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err3; } - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); - - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &par->net->ct.tmpl); + nf_conntrack_tmpl_insert(par->net, ct); out: info->ct = ct; return 0; -- cgit v0.10.2 From 53b70287ddf487a38b7cbf0a10db28f40714b799 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 12:26:22 +0100 Subject: netfilter: nf_tables: fix overrun in nf_tables_set_alloc_name() The map that is used to allocate anonymous sets is indeed BITS_PER_BYTE * PAGE_SIZE long. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9ce3053..2a22a18 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1989,13 +1989,13 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (!sscanf(i->name, name, &tmp)) continue; - if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE) continue; set_bit(tmp, inuse); } - n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); free_page((unsigned long)inuse); } -- cgit v0.10.2 From ec2c9935688fbd5eaa7c975e3e21562c3da77363 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 15:03:35 +0000 Subject: netfilter: nf_tables: fix potential oops when dumping sets Commit c9c8e48597 (netfilter: nf_tables: dump sets in all existing families) changed nft_ctx_init_from_setattr() to only look up the address family if it is not NFPROTO_UNSPEC. However if it is NFPROTO_UNSPEC and a table attribute is given, nftables_afinfo_lookup() will dereference the NULL afi pointer. Fix by checking for non-NULL afi and also move a check added by that commit to the proper position. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2a22a18..3c5a219 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1943,6 +1943,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, } if (nla[NFTA_SET_TABLE] != NULL) { + if (afi == NULL) + return -EAFNOSUPPORT; + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -2428,6 +2431,8 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; @@ -2435,9 +2440,6 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; - if (nfmsg->nfgen_family == NFPROTO_UNSPEC) - return -EAFNOSUPPORT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); -- cgit v0.10.2 From 51292c0735eb2d9e29115cbf6264845e19a6c77d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 15:03:36 +0000 Subject: netfilter: nft_ct: fix missing NFT_CT_L3PROTOCOL key in validity checks The key was missing in the list of valid keys, add it. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index feaf0f3..46e2754 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -226,6 +226,7 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; break; + case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: -- cgit v0.10.2 From 64d46806b6218c97f68742c5663a8ae3a5fbe838 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 15:03:37 +0000 Subject: netfilter: nf_tables: add AF specific expression support For the reject module, we need to add AF-specific implementations to get rid of incorrect module dependencies. Try to load an AF-specific module first and fall back to generic modules. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 57c8ff7..0f68e47 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -252,6 +252,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @owner: module reference * @policy: netlink attribute policy * @maxattr: highest netlink attribute number + * @family: address family for AF-specific types */ struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, @@ -262,6 +263,7 @@ struct nft_expr_type { struct module *owner; const struct nla_policy *policy; unsigned int maxattr; + u8 family; }; /** @@ -529,6 +531,9 @@ void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_CHAIN(family, name) \ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) +#define MODULE_ALIAS_NFT_AF_EXPR(family, name) \ + MODULE_ALIAS("nft-expr-" __stringify(family) "-" name) + #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3c5a219..113c469 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1114,35 +1114,45 @@ void nft_unregister_expr(struct nft_expr_type *type) } EXPORT_SYMBOL_GPL(nft_unregister_expr); -static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) +static const struct nft_expr_type *__nft_expr_type_get(u8 family, + struct nlattr *nla) { const struct nft_expr_type *type; list_for_each_entry(type, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, type->name)) + if (!nla_strcmp(nla, type->name) && + (!type->family || type->family == family)) return type; } return NULL; } -static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) +static const struct nft_expr_type *nft_expr_type_get(u8 family, + struct nlattr *nla) { const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); - type = __nft_expr_type_get(nla); + type = __nft_expr_type_get(family, nla); if (type != NULL && try_module_get(type->owner)) return type; #ifdef CONFIG_MODULES if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%u-%.*s", family, + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_type_get(family, nla)) + return ERR_PTR(-EAGAIN); + + nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_expr_type_get(nla)) + if (__nft_expr_type_get(family, nla)) return ERR_PTR(-EAGAIN); } #endif @@ -1193,7 +1203,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (err < 0) return err; - type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); if (IS_ERR(type)) return PTR_ERR(type); -- cgit v0.10.2 From cc4723ca316742891954efa346298e7c747c0d17 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 15:03:38 +0000 Subject: netfilter: nft_reject: split up reject module into IPv4 and IPv6 specifc parts Currently the nft_reject module depends on symbols from ipv6. This is wrong since no generic module should force IPv6 support to be loaded. Split up the module into AF-specific and a generic part. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h new file mode 100644 index 0000000..ecda759 --- /dev/null +++ b/include/net/netfilter/nft_reject.h @@ -0,0 +1,17 @@ +#ifndef _NFT_REJECT_H_ +#define _NFT_REJECT_H_ + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +extern const struct nla_policy nft_reject_policy[]; + +int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 81c6910..a26ce03 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4 packet transformations such as the source, destination address and source and destination ports. +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + default NFT_REJECT + tristate + config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c16be9d..90b8240 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 0000000..e935d8d --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2013 Eric Leblond + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 35750df..4bff1f2 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6 packet transformations such as the source, destination address and source and destination ports. +config NFT_REJECT_IPV6 + depends on NF_TABLES_IPV6 + default NFT_REJECT + tristate + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d1b4928..70d3dd6 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o +obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c new file mode 100644 index 0000000..f732859 --- /dev/null +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2013 Eric Leblond + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static struct nft_expr_type nft_reject_ipv6_type; +static const struct nft_expr_ops nft_reject_ipv6_ops = { + .type = &nft_reject_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv6_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "reject", + .ops = &nft_reject_ipv6_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv6_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv6_type); +} + +static void __exit nft_reject_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv6_type); +} + +module_init(nft_reject_ipv6_module_init); +module_exit(nft_reject_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c374675..ed8b50e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -513,7 +513,6 @@ config NFT_QUEUE config NFT_REJECT depends on NF_TABLES - depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" help diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 5e204711..f3448c2 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -16,65 +16,23 @@ #include #include #include -#include -#include +#include -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) -#include -#endif - -struct nft_reject { - enum nft_reject_types type:8; - u8 icmp_code; - u8 family; -}; - -static void nft_reject_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - struct nft_reject *priv = nft_expr_priv(expr); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); -#endif - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - if (priv->family == NFPROTO_IPV4) - nf_send_unreach(pkt->skb, priv->icmp_code); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - else if (priv->family == NFPROTO_IPV6) - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); -#endif - break; - case NFT_REJECT_TCP_RST: - if (priv->family == NFPROTO_IPV4) - nf_send_reset(pkt->skb, pkt->ops->hooknum); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - else if (priv->family == NFPROTO_IPV6) - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); -#endif - break; - } - - data[NFT_REG_VERDICT].verdict = NF_DROP; -} - -static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { +const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, }; +EXPORT_SYMBOL_GPL(nft_reject_policy); -static int nft_reject_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; - priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: @@ -89,8 +47,9 @@ static int nft_reject_init(const struct nft_ctx *ctx, return 0; } +EXPORT_SYMBOL_GPL(nft_reject_init); -static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_reject *priv = nft_expr_priv(expr); @@ -109,37 +68,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) nla_put_failure: return -1; } - -static struct nft_expr_type nft_reject_type; -static const struct nft_expr_ops nft_reject_ops = { - .type = &nft_reject_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), - .eval = nft_reject_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, -}; - -static struct nft_expr_type nft_reject_type __read_mostly = { - .name = "reject", - .ops = &nft_reject_ops, - .policy = nft_reject_policy, - .maxattr = NFTA_REJECT_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_reject_module_init(void) -{ - return nft_register_expr(&nft_reject_type); -} - -static void __exit nft_reject_module_exit(void) -{ - nft_unregister_expr(&nft_reject_type); -} - -module_init(nft_reject_module_init); -module_exit(nft_reject_module_exit); +EXPORT_SYMBOL_GPL(nft_reject_dump); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("reject"); -- cgit v0.10.2 From 05513e9e33dbded8124567466a444d32173eecc6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Feb 2014 15:03:39 +0000 Subject: netfilter: nf_tables: add reject module for NFPROTO_INET Add a reject module for NFPROTO_INET. It does nothing but dispatch to the AF-specific modules based on the hook family. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index ecda759..36b0da2 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -14,4 +14,12 @@ int nft_reject_init(const struct nft_ctx *ctx, int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); +void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + +void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + #endif diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index e935d8d..e79718a 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -20,9 +20,9 @@ #include #include -static void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -37,6 +37,7 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } +EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); static struct nft_expr_type nft_reject_ipv4_type; static const struct nft_expr_ops nft_reject_ipv4_ops = { diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index f732859..0bc19fa 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include #include -static void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,6 +38,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } +EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ed8b50e..e9410d1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -520,6 +520,11 @@ config NFT_REJECT explicitly deny and notify via TCP reset/ICMP informational errors unallowed traffic. +config NFT_REJECT_INET + depends on NF_TABLES_INET + default NFT_REJECT + tristate + config NFT_COMPAT depends on NF_TABLES depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ee9c4de..bffdad7 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o +obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c new file mode 100644 index 0000000..8a310f2 --- /dev/null +++ b/net/netfilter/nft_reject_inet.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2014 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_reject_inet_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + switch (pkt->ops->pf) { + case NFPROTO_IPV4: + nft_reject_ipv4_eval(expr, data, pkt); + case NFPROTO_IPV6: + nft_reject_ipv6_eval(expr, data, pkt); + } +} + +static struct nft_expr_type nft_reject_inet_type; +static const struct nft_expr_ops nft_reject_inet_ops = { + .type = &nft_reject_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_inet_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "reject", + .ops = &nft_reject_inet_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_inet_module_init(void) +{ + return nft_register_expr(&nft_reject_inet_type); +} + +static void __exit nft_reject_inet_module_exit(void) +{ + nft_unregister_expr(&nft_reject_inet_type); +} + +module_init(nft_reject_inet_module_init); +module_exit(nft_reject_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); -- cgit v0.10.2 From b8ecbee67c732ef9fc47fcf50aed6b7bb6231d98 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 6 Feb 2014 09:17:41 +0000 Subject: netfilter: nf_tables: fix log/queue expressions for NFPROTO_INET The log and queue expressions both store the family during ->init() and use it to deliver packets. This is wrong when used in NFPROTO_INET since they should both deliver to the actual AF of the packet, not the dummy NFPROTO_INET. Use the family from the hook ops to fix this. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 5af7901..26c5154 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -23,7 +23,6 @@ static const char *nft_log_null_prefix = ""; struct nft_log { struct nf_loginfo loginfo; char *prefix; - int family; }; static void nft_log_eval(const struct nft_expr *expr, @@ -33,7 +32,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } @@ -52,8 +51,6 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; - priv->family = ctx->afi->family; - nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index cbea473..e8ae2f6 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -25,7 +25,6 @@ struct nft_queue { u16 queuenum; u16 queues_total; u16 flags; - u8 family; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -43,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, priv->family, + priv->queues_total, pkt->ops->pf, jhash_initval); } } @@ -71,7 +70,6 @@ static int nft_queue_init(const struct nft_ctx *ctx, return -EINVAL; init_hashrandom(&jhash_initval); - priv->family = ctx->afi->family; priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); if (tb[NFTA_QUEUE_TOTAL] != NULL) -- cgit v0.10.2 From 0165d9325d6a3cf856e2cbbe64a0f4635ac75893 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Jan 2014 14:03:51 +0100 Subject: netfilter: nf_tables: fix racy rule deletion We may lost race if we flush the rule-set (which happens asynchronously via call_rcu) and we try to remove the table (that userspace assumes to be empty). Fix this by recovering synchronous rule and chain deletion. This was introduced time ago before we had no batch support, and synchronous rule deletion performance was not good. Now that we have the batch support, we can just postpone the purge of old rule in a second step in the commit phase. All object deletions are synchronous after this patch. As a side effect, we save memory as we don't need rcu_head per rule anymore. Cc: Patrick McHardy Reported-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f68e47..e7e14ff 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -322,7 +322,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * struct nft_rule - nf_tables rule * * @list: used internally - * @rcu_head: used internally for rcu * @handle: rule handle * @genmask: generation mask * @dlen: length of expression data @@ -330,7 +329,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) */ struct nft_rule { struct list_head list; - struct rcu_head rcu_head; u64 handle:46, genmask:2, dlen:16; @@ -391,7 +389,6 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally - * @rcu_head: used internally * @net: net namespace that this chain belongs to * @table: table that this chain belongs to * @handle: chain handle @@ -403,7 +400,6 @@ enum nft_chain_flags { struct nft_chain { struct list_head rules; struct list_head list; - struct rcu_head rcu_head; struct net *net; struct nft_table *table; u64 handle; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 113c469..3a2e480 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1008,10 +1008,8 @@ notify: return 0; } -static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +static void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); - BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { @@ -1059,7 +1057,9 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, family); /* Make sure all rule references are gone before this is released */ - call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + synchronize_rcu(); + + nf_tables_chain_destroy(chain); return 0; } @@ -1531,9 +1531,8 @@ err: return err; } -static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +static void nf_tables_rule_destroy(struct nft_rule *rule) { - struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); struct nft_expr *expr; /* @@ -1548,11 +1547,6 @@ static void nf_tables_rcu_rule_destroy(struct rcu_head *head) kfree(rule); } -static void nf_tables_rule_destroy(struct nft_rule *rule) -{ - call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); -} - #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -1819,9 +1813,6 @@ static int nf_tables_commit(struct sk_buff *skb) synchronize_rcu(); list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* Delete this rule from the dirty list */ - list_del(&rupd->list); - /* This rule was inactive in the past and just became active. * Clear the next bit of the genmask since its meaning has * changed, now it is the future. @@ -1832,6 +1823,7 @@ static int nf_tables_commit(struct sk_buff *skb) rupd->chain, rupd->rule, NFT_MSG_NEWRULE, 0, rupd->family); + list_del(&rupd->list); kfree(rupd); continue; } @@ -1841,7 +1833,15 @@ static int nf_tables_commit(struct sk_buff *skb) nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, rupd->rule, NFT_MSG_DELRULE, 0, rupd->family); + } + + /* Make sure we don't see any packet traversing old rules */ + synchronize_rcu(); + + /* Now we can safely release unused old rules */ + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { nf_tables_rule_destroy(rupd->rule); + list_del(&rupd->list); kfree(rupd); } @@ -1854,20 +1854,26 @@ static int nf_tables_abort(struct sk_buff *skb) struct nft_rule_trans *rupd, *tmp; list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* Delete all rules from the dirty list */ - list_del(&rupd->list); - if (!nft_rule_is_active_next(net, rupd->rule)) { nft_rule_clear(net, rupd->rule); + list_del(&rupd->list); kfree(rupd); continue; } /* This rule is inactive, get rid of it */ list_del_rcu(&rupd->rule->list); + } + + /* Make sure we don't see any packet accessing aborted rules */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { nf_tables_rule_destroy(rupd->rule); + list_del(&rupd->list); kfree(rupd); } + return 0; } -- cgit v0.10.2 From bd7fc645dabab423ef362186db2917f3919321d3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Feb 2014 12:53:07 +0100 Subject: netfilter: nf_tables: do not allow NFT_SET_ELEM_INTERVAL_END flag and data This combination is not allowed since end interval elements cannot contain data. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3a2e480..d0c790e3e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2741,6 +2741,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; + if (nla[NFTA_SET_ELEM_DATA] != NULL && + elem.flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; -- cgit v0.10.2 From 2fb91ddbf8e1dcb207e1e2085473aeaeff975102 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 6 Feb 2014 16:15:39 +0100 Subject: netfilter: nft_rbtree: fix data handling of end interval elements This patch fixes several things which related to the handling of end interval elements: * Chain use underflow with intervals and map: If you add a rule using intervals+map that introduces a loop, the error path of the rbtree set decrements the chain refcount for each side of the interval, leading to a chain use counter underflow. * Don't copy the data part of the end interval element since, this area is uninitialized and this confuses the loop detection code. * Don't allocate room for the data part of end interval elements since this is unused. So, after this patch the idea is that end interval elements don't have a data part. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index ca0c1b2..e21d69d 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -69,8 +69,10 @@ static void nft_rbtree_elem_destroy(const struct nft_set *set, struct nft_rbtree_elem *rbe) { nft_data_uninit(&rbe->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); } @@ -108,7 +110,8 @@ static int nft_rbtree_insert(const struct nft_set *set, int err; size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) size += sizeof(rbe->data[0]); rbe = kzalloc(size, GFP_KERNEL); @@ -117,7 +120,8 @@ static int nft_rbtree_insert(const struct nft_set *set, rbe->flags = elem->flags; nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(rbe->data, &elem->data); err = __nft_rbtree_insert(set, rbe); @@ -153,7 +157,8 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) parent = parent->rb_right; else { elem->cookie = rbe; - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; return 0; @@ -177,7 +182,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem.data, rbe->data); elem.flags = rbe->flags; -- cgit v0.10.2 From 62f9c8b40d2db915f89a6aa47395412fb29f1cfc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Feb 2014 14:45:01 +0100 Subject: netfilter: nf_tables: fix loop checking with end interval elements Fix access to uninitialized data for end interval elements. The element data part is uninitialized in interval end elements. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d0c790e3e..adce01e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2998,6 +2998,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + return 0; + switch (elem->data.verdict) { case NFT_JUMP: case NFT_GOTO: -- cgit v0.10.2 From 6d8c00d58e9e484fdc41aaaf62e5d8364efe375a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Jan 2014 18:42:42 +0000 Subject: netfilter: nf_tables: unininline nft_trace_packet() It makes no sense to inline a rarely used function meant for debugging only that is called a total of five times in the main evaluation loop. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0d879fc..90998a6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -103,9 +103,9 @@ static struct nf_loginfo trace_loginfo = { }, }; -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); -- cgit v0.10.2