summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/core.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c229
-rw-r--r--net/netfilter/nf_conntrack_ecache.c22
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c44
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c81
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c39
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c89
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c131
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c53
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c3
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_log.c8
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c203
-rw-r--r--net/netfilter/nft_hash.c417
-rw-r--r--net/netfilter/nft_numgen.c192
-rw-r--r--net/netfilter/nft_quota.c121
-rw-r--r--net/netfilter/nft_set_hash.c404
-rw-r--r--net/netfilter/nft_set_rbtree.c (renamed from net/netfilter/nft_rbtree.c)12
-rw-r--r--net/netfilter/xt_conntrack.c4
-rw-r--r--net/netfilter/xt_physdev.c4
-rw-r--r--net/netfilter/xt_sctp.c2
28 files changed, 1233 insertions, 929 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9266cee..e8d56d9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_NUMGEN
+ tristate "Netfilter nf_tables number generator module"
+ help
+ This option adds the number generator expression used to perform
+ incremental counting and random numbers bound to a upper limit.
+
config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@ config NFT_CT
This option adds the "meta" expression that you can use to match
connection tracking information such as the flow state.
-config NFT_RBTREE
+config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
-config NFT_HASH
+config NFT_SET_HASH
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@ config NFT_QUEUE
This is required if you intend to use the userspace queueing
infrastructure (also known as NFQUEUE) from nftables.
+config NFT_QUOTA
+ tristate "Netfilter nf_tables quota module"
+ help
+ This option adds the "quota" expression that you can use to match
+ enforce bytes quotas.
+
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+config NFT_HASH
+ tristate "Netfilter nf_tables hash module"
+ help
+ This option adds the "hash" expression that you can use to perform
+ a hash operation on registers.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6913454..0c858110 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
+obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
-obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index f39276d..2c5327e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -188,19 +188,17 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
static LIST_HEAD(nf_hook_list);
-int nf_register_hook(struct nf_hook_ops *reg)
+static int _nf_register_hook(struct nf_hook_ops *reg)
{
struct net *net, *last;
int ret;
- rtnl_lock();
for_each_net(net) {
ret = nf_register_net_hook(net, reg);
if (ret && ret != -ENOENT)
goto rollback;
}
list_add_tail(&reg->list, &nf_hook_list);
- rtnl_unlock();
return 0;
rollback:
@@ -210,19 +208,34 @@ rollback:
break;
nf_unregister_net_hook(net, reg);
}
+ return ret;
+}
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = _nf_register_hook(reg);
rtnl_unlock();
+
return ret;
}
EXPORT_SYMBOL(nf_register_hook);
-void nf_unregister_hook(struct nf_hook_ops *reg)
+static void _nf_unregister_hook(struct nf_hook_ops *reg)
{
struct net *net;
- rtnl_lock();
list_del(&reg->list);
for_each_net(net)
nf_unregister_net_hook(net, reg);
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ rtnl_lock();
+ _nf_unregister_hook(reg);
rtnl_unlock();
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -246,6 +259,26 @@ err:
}
EXPORT_SYMBOL(nf_register_hooks);
+/* Caller MUST take rtnl_lock() */
+int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = _nf_register_hook(&reg[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ _nf_unregister_hooks(reg, i);
+ return err;
+}
+EXPORT_SYMBOL(_nf_register_hooks);
+
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
{
while (n-- > 0)
@@ -253,6 +286,14 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
}
EXPORT_SYMBOL(nf_unregister_hooks);
+/* Caller MUST take rtnl_lock */
+void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ while (n-- > 0)
+ _nf_unregister_hook(&reg[n]);
+}
+EXPORT_SYMBOL(_nf_unregister_hooks);
+
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
struct nf_hook_state *state,
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index f04fd8d..fc230d9 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
- /* Show what happens instead of calling nf_ct_kill() */
- if (del_timer(&ct->timeout)) {
- IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+ if (nf_ct_kill(ct)) {
+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple));
- if (ct->timeout.function)
- ct->timeout.function(ct->timeout.data);
} else {
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
FMT_TUPLE "\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9934b0c..6570982 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 last_bucket;
+ bool exiting;
+};
+
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+#define GC_MAX_BUCKETS_DIV 64u
+#define GC_MAX_BUCKETS 8192u
+#define GC_INTERVAL (5 * HZ)
+#define GC_MAX_EVICTS 256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
@@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
+ if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ return false;
+
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_get_real_ns();
- if (nf_ct_is_dying(ct))
- goto delete;
-
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
- /* destroy event was not delivered */
+ /* destroy event was not delivered. nf_ct_put will
+ * be done by event cache worker on redelivery.
+ */
nf_ct_delete_from_lists(ct);
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
nf_conntrack_ecache_work(nf_ct_net(ct));
- set_bit(IPS_DYING_BIT, &ct->status);
- delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
@@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
{
- struct hlist_nulls_head *hptr;
- unsigned int sequence, hsz;
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return;
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hsz = nf_conntrack_htable_size;
- hptr = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ if (nf_ct_should_gc(ct))
+ nf_ct_kill(ct);
- *hash = hptr;
- *hsize = hsz;
+ nf_ct_put(ct);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
@@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket, sequence;
+ unsigned int bucket, hsize;
begin:
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- bucket = scale_hash(hash);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ bucket = reciprocal_scale(hash, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ struct nf_conn *ct;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
return h;
@@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone, net))
goto out;
- add_timer(&ct->timeout);
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
+ ct->timeout += nfct_time_stamp;
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
@@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
- unsigned int hash, sequence;
+ unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
zone = nf_ct_zone(ignored_conntrack);
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = hash_conntrack(net, tuple);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = __hash_conntrack(net, tuple, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (ct != ignored_conntrack &&
- nf_ct_key_equal(h, tuple, zone, net)) {
+
+ if (ct == ignored_conntrack)
+ continue;
+
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
}
NF_CT_STAT_INC_ATOMIC(net, searched);
}
+
+ if (get_nulls_value(n) != hash) {
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ goto begin;
+ }
+
rcu_read_unlock();
return 0;
@@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+ }
+
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
@@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net,
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
- del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
@@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned hash, sequence, drops;
+ unsigned int hash, hsize, drops;
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = scale_hash(_hash++);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = reciprocal_scale(_hash++, hsize);
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
@@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static void gc_worker(struct work_struct *work)
+{
+ unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned long next_run = GC_INTERVAL;
+ unsigned int ratio, scanned = 0;
+ struct conntrack_gc_work *gc_work;
+
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ i = gc_work->last_bucket;
+
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ struct hlist_nulls_node *n;
+ unsigned int hashsz;
+ struct nf_conn *tmp;
+
+ i++;
+ rcu_read_lock();
+
+ nf_conntrack_get_ht(&ct_hash, &hashsz);
+ if (i >= hashsz)
+ i = 0;
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+ continue;
+ }
+ }
+
+ /* could check get_nulls_value() here and restart if ct
+ * was moved to another chain. But given gc is best-effort
+ * we will just continue with next hash slot.
+ */
+ rcu_read_unlock();
+ cond_resched_rcu_qs();
+ } while (++buckets < goal &&
+ expired_count < GC_MAX_EVICTS);
+
+ if (gc_work->exiting)
+ return;
+
+ ratio = scanned ? expired_count * 100 / scanned : 0;
+ if (ratio >= 90)
+ next_run = 0;
+
+ gc_work->last_bucket = i;
+ schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+}
+
static struct nf_conn *
__nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
- /* Don't set timer yet: wait for confirmation */
- setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
@@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
/* Only update if this is not a fixed timeout */
@@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
goto acct;
/* If not in hash table, timer will not be active yet */
- if (!nf_ct_is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- } else {
- unsigned long newtime = jiffies + extra_jiffies;
-
- /* Only update the timeout if the new timeout is at least
- HZ jiffies from the old timeout. Need del_timer for race
- avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ)
- mod_timer_pending(&ct->timeout, newtime);
- }
+ if (nf_ct_is_confirmed(ct))
+ extra_jiffies += nfct_time_stamp;
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
-bool __nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
{
- if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, ctinfo, skb->len);
- if (del_timer(&ct->timeout)) {
- ct->timeout.function((unsigned long)ct);
- return true;
- }
- return false;
+ return nf_ct_delete(ct, 0, 0);
}
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, portid, report);
-
- /* ... else the timer will get him soon. */
+ nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
@@ -1545,6 +1608,7 @@ static int untrack_refs(void)
void nf_conntrack_cleanup_start(void)
{
+ conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
@@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void)
while (untrack_refs() > 0)
schedule();
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
nf_conntrack_proto_fini();
@@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void)
}
/* - and look it like as a confirmed connection */
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+ conntrack_gc_work_init(&conntrack_gc_work);
+ schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
return 0;
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d28011b..da9df2d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ struct nf_conntrack_ecache *e;
- if (nf_ct_is_dying(ct))
+ if (!nf_ct_is_confirmed(ct))
+ continue;
+
+ e = nf_ct_ecache_find(ct);
+ if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
break;
}
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
+ e->state = NFCT_ECACHE_DESTROY_SENT;
refs[evicted] = ct;
if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
if (!e)
goto out_unlock;
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ if (nf_ct_is_confirmed(ct)) {
struct nf_ct_event item = {
.ct = ct,
.portid = e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
- if (eventmask & (1 << IPCT_DESTROY) &&
- e->portid == 0 && portid != 0)
- e->portid = portid;
- else
+ if (eventmask & (1 << IPCT_DESTROY)) {
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
+ } else {
e->missed |= eventmask;
+ }
} else {
e->missed &= ~missed;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4314700..b6934b5 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
}
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
- pr_debug("try_eprt: invalid delimitter.\n");
+ pr_debug("try_eprt: invalid delimiter.\n");
return 0;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fdfc71f..c052b71 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -149,10 +149,7 @@ nla_put_failure:
static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
- long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
- if (timeout < 0)
- timeout = 0;
+ long timeout = nf_ct_expires(ct) / HZ;
if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
goto nla_put_failure;
@@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
+ struct nf_conn *nf_ct_evict[8];
+ int res, i;
spinlock_t *lockp;
last = (struct nf_conn *)cb->args[1];
+ i = 0;
local_bh_disable();
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@ restart:
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ if (i < ARRAY_SIZE(nf_ct_evict) &&
+ atomic_inc_not_zero(&ct->ct_general.use))
+ nf_ct_evict[i++] = ct;
+ continue;
+ }
+
if (!net_eq(net, nf_ct_net(ct)))
continue;
@@ -878,6 +891,13 @@ out:
if (last)
nf_ct_put(last);
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
return skb->len;
}
@@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
}
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
nf_ct_put(ct);
return 0;
@@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
{
u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- if (!del_timer(&ct->timeout))
- return -ETIME;
+ ct->timeout = nfct_time_stamp + timeout * HZ;
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
return 0;
}
@@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+ ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7a..f60a475 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
+ nf_ct_kill(sibling);
nf_ct_put(sibling);
return 1;
} else {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b65d586..8d2c7d8 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- if (l3proto->l3proto == PF_INET)
- return &net->ct.nf_ct_proto;
- else
- return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- int err = 0;
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
- /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
- if (in == NULL)
- return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table != NULL) {
- err = nf_ct_register_sysctl(net,
- &in->ctl_table_header,
- l3proto->ctl_table_path,
- in->ctl_table);
- if (err < 0) {
- kfree(in->ctl_table);
- in->ctl_table = NULL;
- }
- }
-#endif
- return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
- if (in == NULL)
- return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table_header != NULL)
- nf_ct_unregister_sysctl(&in->ctl_table_header,
- &in->ctl_table,
- 0);
-#endif
-}
-
int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
@@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- int ret = 0;
+ int ret;
if (proto->init_net) {
ret = proto->init_net(net);
@@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net,
return ret;
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+ return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
@@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- nf_ct_l3proto_unregister_sysctl(net, proto);
-
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
@@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
}
}
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
- if (err < 0) {
- nf_ct_kfree_compat_sysctl_table(pn);
- goto out;
- }
- err = nf_ct_register_sysctl(net,
- &pn->ctl_compat_header,
- "net/ipv4/netfilter",
- pn->ctl_compat_table);
- if (err == 0)
- goto out;
-
- nf_ct_kfree_compat_sysctl_table(pn);
- nf_ct_unregister_sysctl(&pn->ctl_table_header,
- &pn->ctl_table,
- pn->users);
- }
-out:
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
return err;
}
@@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
pn->users);
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
- nf_ct_unregister_sysctl(&pn->ctl_compat_header,
- &pn->ctl_compat_table,
- 0);
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 399a38f..a45bee5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
{
struct dccp_hdr _hdr, *dh;
- dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (dh == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 86dc752..d5868ba 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table generic_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_generic_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
- sizeof(generic_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &gn->timeout;
-#endif
-#endif
- return 0;
-}
-
static int generic_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_generic_net *gn = generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
- ret = generic_kmemdup_compat_sysctl_table(pn, gn);
- if (ret < 0)
- return ret;
-
- ret = generic_kmemdup_sysctl_table(pn, gn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
-
- return ret;
+ return generic_kmemdup_sysctl_table(pn, gn);
}
static struct nf_proto_net *generic_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1d7ab96..982ea62 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct sctphdr *hp;
struct sctphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table sctp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_sctp_timeout_closed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif
static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
- sizeof(sctp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
- pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
- pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
- pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
- pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
- pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-#endif
-#endif
- return 0;
-}
-
static int sctp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
@@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
sn->timeouts[i] = sctp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
- if (ret < 0)
- return ret;
-
- ret = sctp_kmemdup_sysctl_table(pn, sn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = sctp_kmemdup_sysctl_table(pn, sn);
-
- return ret;
+ return sctp_kmemdup_sysctl_table(pn, sn);
}
static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c8381..69f6877 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct tcphdr *hp;
struct tcphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table tcp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent2",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_loose",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_be_liberal",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
- sizeof(tcp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
- pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
- pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
- pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
- pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
- pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
- pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
- pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
- pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
- pn->ctl_compat_table[10].data = &tn->tcp_loose;
- pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
- pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
-#endif
-#endif
- return 0;
-}
-
static int tcp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
@@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- if (proto == AF_INET) {
- ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
- if (ret < 0)
- return ret;
-
- ret = tcp_kmemdup_sysctl_table(pn, tn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = tcp_kmemdup_sysctl_table(pn, tn);
-
- return ret;
+ return tcp_kmemdup_sysctl_table(pn, tn);
}
static struct nf_proto_net *tcp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 4fd0405..20f35ed 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table udp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_udp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_udp_timeout_stream",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
- sizeof(udp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
- pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-#endif
- return 0;
-}
-
static int udp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_udp_net *un = udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
@@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto)
un->timeouts[i] = udp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = udp_kmemdup_compat_sysctl_table(pn, un);
- if (ret < 0)
- return ret;
-
- ret = udp_kmemdup_sysctl_table(pn, un);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = udp_kmemdup_sysctl_table(pn, un);
-
- return ret;
+ return udp_kmemdup_sysctl_table(pn, un);
}
static struct nf_proto_net *udp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 9d692f5..029206e 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9f267c3..3d9a316 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -228,8 +228,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name, nf_ct_l3num(ct),
l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+ nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a..30a17d6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
return NULL;
}
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
{
const struct nf_logger *log;
- if (pf == NFPROTO_UNSPEC)
- return;
+ if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+ return -EOPNOTSUPP;
mutex_lock(&nf_log_mutex);
log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
+
+ return 0;
}
EXPORT_SYMBOL(nf_log_set);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ecee105..bbb8f3d 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -566,16 +566,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- if (!del_timer(&ct->timeout))
- return 1;
-
ct->status &= ~IPS_NAT_DONE_MASK;
-
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
- add_timer(&ct->timeout);
-
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
*/
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7e1c876..bd9715e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
+struct nft_chain_hook {
+ u32 num;
+ u32 priority;
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+};
+
+static int nft_chain_parse_hook(struct net *net,
+ const struct nlattr * const nla[],
+ struct nft_af_info *afi,
+ struct nft_chain_hook *hook, bool create)
+{
+ struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+ int err;
+
+ err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+ nft_hook_policy);
+ if (err < 0)
+ return err;
+
+ if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+ ha[NFTA_HOOK_PRIORITY] == NULL)
+ return -EINVAL;
+
+ hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ if (hook->num >= afi->nhooks)
+ return -EINVAL;
+
+ hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+ type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+ create);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+ }
+ if (!(type->hook_mask & (1 << hook->num)))
+ return -EOPNOTSUPP;
+ if (!try_module_get(type->owner))
+ return -ENOENT;
+
+ hook->type = type;
+
+ hook->dev = NULL;
+ if (afi->flags & NFT_AF_NEEDS_DEV) {
+ char ifname[IFNAMSIZ];
+
+ if (!ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+ dev = dev_get_by_name(net, ifname);
+ if (!dev) {
+ module_put(type->owner);
+ return -ENOENT;
+ }
+ hook->dev = dev;
+ } else if (ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+ module_put(hook->type->owner);
+ if (hook->dev != NULL)
+ dev_put(hook->dev);
+}
+
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
- struct nlattr *ha[NFTA_HOOK_MAX + 1];
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
@@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_base_chain *basechain;
+ struct nft_chain_hook hook;
+ struct nf_hook_ops *ops;
+
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(net, nla, afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
if (nla[NFTA_CHAIN_HANDLE] && name) {
struct nft_chain *chain2;
@@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
- const struct nf_chain_type *type;
+ struct nft_chain_hook hook;
struct nf_hook_ops *ops;
nf_hookfn *hookfn;
- u32 hooknum, priority;
-
- type = chain_type[family][NFT_CHAIN_T_DEFAULT];
- if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi,
- nla[NFTA_CHAIN_TYPE],
- create);
- if (IS_ERR(type))
- return PTR_ERR(type);
- }
- err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
- nft_hook_policy);
+ err = nft_chain_parse_hook(net, nla, afi, &hook, create);
if (err < 0)
return err;
- if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
- ha[NFTA_HOOK_PRIORITY] == NULL)
- return -EINVAL;
-
- hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hooknum >= afi->nhooks)
- return -EINVAL;
- priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
-
- if (!(type->hook_mask & (1 << hooknum)))
- return -EOPNOTSUPP;
- if (!try_module_get(type->owner))
- return -ENOENT;
- hookfn = type->hooks[hooknum];
-
- if (afi->flags & NFT_AF_NEEDS_DEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = dev_get_by_name(net, ifname);
- if (!dev) {
- module_put(type->owner);
- return -ENOENT;
- }
- } else if (ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
- module_put(type->owner);
- if (dev != NULL)
- dev_put(dev);
+ nft_chain_release_hook(&hook);
return -ENOMEM;
}
- if (dev != NULL)
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ if (hook.dev != NULL)
+ strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return PTR_ERR(stats);
}
basechain->stats = stats;
} else {
stats = netdev_alloc_pcpu_stats(struct nft_stats);
if (stats == NULL) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
}
- basechain->type = type;
+ hookfn = hook.type->hooks[hook.num];
+ basechain->type = hook.type;
chain = &basechain->chain;
for (i = 0; i < afi->nops; i++) {
ops = &basechain->ops[i];
ops->pf = family;
- ops->hooknum = hooknum;
- ops->priority = priority;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
ops->priv = chain;
ops->hook = afi->hooks[ops->hooknum];
- ops->dev = dev;
+ ops->dev = hook.dev;
if (hookfn)
ops->hook = hookfn;
if (afi->hook_ops_init)
@@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
}
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr)
+ const struct nlattr *attr, u32 nlmsg_flags)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
struct nft_set_ext_tmpl tmpl;
- struct nft_set_ext *ext;
+ struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
struct nft_userdata *udata;
@@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err4;
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
- err = set->ops->insert(ctx->net, set, &elem);
- if (err < 0)
+ err = set->ops->insert(ctx->net, set, &elem, &ext2);
+ if (err) {
+ if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+ memcmp(nft_set_ext_data(ext),
+ nft_set_ext_data(ext2), set->dlen) != 0)
+ err = -EBUSY;
+ else if (!(nlmsg_flags & NLM_F_EXCL))
+ err = 0;
+ }
goto err5;
+ }
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
!atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
return -ENFILE;
- err = nft_add_set_elem(&ctx, set, attr);
+ err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
if (err < 0) {
atomic_dec(&set->nelems);
break;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 564fa79..764251d 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,395 +1,136 @@
/*
- * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/jhash.h>
#include <linux/netlink.h>
-#include <linux/workqueue.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-
-/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
struct nft_hash {
- struct rhashtable ht;
- struct delayed_work gc_work;
-};
-
-struct nft_hash_elem {
- struct rhash_head node;
- struct nft_set_ext ext;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 len;
+ u32 modulus;
+ u32 seed;
};
-struct nft_hash_cmp_arg {
- const struct nft_set *set;
- const u32 *key;
- u8 genmask;
-};
-
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_cmp_arg *arg = data;
-
- return jhash(arg->key, len, seed);
-}
-
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_elem *he = data;
-
- return jhash(nft_set_ext_key(&he->ext), len, seed);
-}
-
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct nft_hash_cmp_arg *x = arg->key;
- const struct nft_hash_elem *he = ptr;
-
- if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
- return 1;
- if (nft_set_elem_expired(&he->ext))
- return 1;
- if (!nft_set_elem_active(&he->ext, x->genmask))
- return 1;
- return 0;
-}
-
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_cur(net),
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- *ext = &he->ext;
-
- return !!he;
-}
-
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = NFT_GENMASK_ANY,
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- goto out;
-
- he = new(set, expr, regs);
- if (he == NULL)
- goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
- goto err2;
-out:
- *ext = &he->ext;
- return true;
-
-err2:
- nft_set_elem_destroy(set, he);
-err1:
- return false;
-}
-
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params);
-}
-
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ const void *data = &regs->data[priv->sreg];
- nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
+ regs->data[priv->dreg] =
+ reciprocal_scale(jhash(data, priv->len, priv->seed),
+ priv->modulus);
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- rcu_read_lock();
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext))
- nft_set_elem_change_active(net, set, &he->ext);
- else
- he = NULL;
- }
- rcu_read_unlock();
-
- return he;
-}
-
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
-
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-}
-
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
- struct nft_set_iter *iter)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct rhashtable_iter hti;
- struct nft_set_elem elem;
- int err;
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- iter->err = err;
- if (err)
- return;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- err = PTR_ERR(he);
- if (err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- continue;
- }
-
- if (iter->count < iter->skip)
- goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
-
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
- goto out;
-
-cont:
- iter->count++;
- }
-
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-}
-
-static void nft_hash_gc(struct work_struct *work)
-{
- struct nft_set *set;
- struct nft_hash_elem *he;
- struct nft_hash *priv;
- struct nft_set_gc_batch *gcb = NULL;
- struct rhashtable_iter hti;
- int err;
-
- priv = container_of(work, struct nft_hash, gc_work.work);
- set = nft_set_container_of(priv);
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- if (err)
- goto schedule;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN)
- goto out;
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- goto out;
- continue;
- }
-
- if (!nft_set_elem_expired(&he->ext))
- continue;
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
-
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- goto out;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
- }
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-
- nft_set_gc_batch_complete(gcb);
-schedule:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-}
-
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-{
- return sizeof(struct nft_hash);
-}
-
-static const struct rhashtable_params nft_hash_params = {
- .head_offset = offsetof(struct nft_hash_elem, node),
- .hashfn = nft_hash_key,
- .obj_hashfn = nft_hash_obj,
- .obj_cmpfn = nft_hash_cmp,
- .automatic_shrinking = true,
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+ [NFTA_HASH_SREG] = { .type = NLA_U32 },
+ [NFTA_HASH_DREG] = { .type = NLA_U32 },
+ [NFTA_HASH_LEN] = { .type = NLA_U32 },
+ [NFTA_HASH_MODULUS] = { .type = NLA_U32 },
+ [NFTA_HASH_SEED] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_set *set,
- const struct nft_set_desc *desc,
+static int nft_hash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct rhashtable_params params = nft_hash_params;
- int err;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ u32 len;
- params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
- params.key_len = set->klen;
+ if (!tb[NFTA_HASH_SREG] ||
+ !tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_LEN] ||
+ !tb[NFTA_HASH_SEED] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
- err = rhashtable_init(&priv->ht, &params);
- if (err < 0)
- return err;
+ priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
- return 0;
-}
+ len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+ if (len == 0 || len > U8_MAX)
+ return -ERANGE;
-static void nft_hash_elem_destroy(void *ptr, void *arg)
-{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
-}
+ priv->len = len;
-static void nft_hash_destroy(const struct nft_set *set)
-{
- struct nft_hash *priv = nft_set_priv(set);
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
- cancel_delayed_work_sync(&priv->gc_work);
- rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
- (void *)set);
+ priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+ return nft_validate_register_load(priv->sreg, len) &&
+ nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
}
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
- struct nft_set_estimate *est)
+static int nft_hash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
- unsigned int esize;
+ const struct nft_hash *priv = nft_expr_priv(expr);
- esize = sizeof(struct nft_hash_elem);
- if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- roundup_pow_of_two(desc->size * 4 / 3) *
- sizeof(struct nft_hash_elem *) +
- desc->size * esize;
- } else {
- /* Resizing happens when the load drops below 30% or goes
- * above 75%. The average of 52.5% load (approximated by 50%)
- * is used for the size estimation of the hash buckets,
- * meaning we calculate two buckets per element.
- */
- est->size = esize + 2 * sizeof(struct nft_hash_elem *);
- }
+ if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+ goto nla_put_failure;
- est->class = NFT_SET_CLASS_O_1;
+ return 0;
- return true;
+nla_put_failure:
+ return -1;
}
-static struct nft_set_ops nft_hash_ops __read_mostly = {
- .privsize = nft_hash_privsize,
- .elemsize = offsetof(struct nft_hash_elem, ext),
- .estimate = nft_hash_estimate,
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+ .eval = nft_hash_eval,
.init = nft_hash_init,
- .destroy = nft_hash_destroy,
- .insert = nft_hash_insert,
- .activate = nft_hash_activate,
- .deactivate = nft_hash_deactivate,
- .remove = nft_hash_remove,
- .lookup = nft_hash_lookup,
- .update = nft_hash_update,
- .walk = nft_hash_walk,
- .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .dump = nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+ .name = "hash",
+ .ops = &nft_hash_ops,
+ .policy = nft_hash_policy,
+ .maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
};
static int __init nft_hash_module_init(void)
{
- return nft_register_set(&nft_hash_ops);
+ return nft_register_expr(&nft_hash_type);
}
static void __exit nft_hash_module_exit(void)
{
- nft_unregister_set(&nft_hash_ops);
+ nft_unregister_expr(&nft_hash_type);
}
module_init(nft_hash_module_init);
module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 0000000..294745e
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+ enum nft_registers dreg:8;
+ u32 until;
+ atomic_t counter;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+ u32 nval, oval;
+
+ do {
+ oval = atomic_read(&priv->counter);
+ nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+ } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+ memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+ [NFTA_NG_DREG] = { .type = NLA_U32 },
+ [NFTA_NG_UNTIL] = { .type = NLA_U32 },
+ [NFTA_NG_TYPE] = { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+ atomic_set(&priv->counter, 0);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+ u32 until, enum nft_ng_types type)
+{
+ if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+}
+
+struct nft_ng_random {
+ enum nft_registers dreg:8;
+ u32 until;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+ struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+
+ regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
+ priv->until);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ prandom_init_once(&nft_numgen_prandom_state);
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+ .eval = nft_ng_inc_eval,
+ .init = nft_ng_inc_init,
+ .dump = nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+ .eval = nft_ng_random_eval,
+ .init = nft_ng_random_init,
+ .dump = nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_NG_DREG] ||
+ !tb[NFTA_NG_UNTIL] ||
+ !tb[NFTA_NG_TYPE])
+ return ERR_PTR(-EINVAL);
+
+ type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+ switch (type) {
+ case NFT_NG_INCREMENTAL:
+ return &nft_ng_inc_ops;
+ case NFT_NG_RANDOM:
+ return &nft_ng_random_ops;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+ .name = "numgen",
+ .select_ops = &nft_ng_select_ops,
+ .policy = nft_ng_policy,
+ .maxattr = NFTA_NG_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+ return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+ nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 0000000..6eafbf9
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+ u64 quota;
+ bool invert;
+ atomic64_t remain;
+};
+
+static inline long nft_quota(struct nft_quota *priv,
+ const struct nft_pktinfo *pkt)
+{
+ return atomic64_sub_return(pkt->skb->len, &priv->remain);
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ if (nft_quota(priv, pkt) < 0 && !priv->invert)
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+ [NFTA_QUOTA_BYTES] = { .type = NLA_U64 },
+ [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = 0;
+ u64 quota;
+
+ if (!tb[NFTA_QUOTA_BYTES])
+ return -EINVAL;
+
+ quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+ if (quota > S64_MAX)
+ return -EOVERFLOW;
+
+ if (tb[NFTA_QUOTA_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+ if (flags & ~NFT_QUOTA_F_INV)
+ return -EINVAL;
+ }
+
+ priv->quota = quota;
+ priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+ atomic64_set(&priv->remain, quota);
+
+ return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+ if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+ NFTA_QUOTA_PAD) ||
+ nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+ .type = &nft_quota_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+ .eval = nft_quota_eval,
+ .init = nft_quota_init,
+ .dump = nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+ .name = "quota",
+ .ops = &nft_quota_ops,
+ .policy = nft_quota_policy,
+ .maxattr = NFTA_QUOTA_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+ return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+ nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644
index 0000000..3794cb2
--- /dev/null
+++ b/net/netfilter/nft_set_hash.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
+
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
+struct nft_hash_elem {
+ struct rhash_head node;
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const u32 *key;
+ u8 genmask;
+};
+
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_cmp_arg *arg = data;
+
+ return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_elem *he = data;
+
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
+
+ return !!he;
+}
+
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
+
+ he = new(set, expr, regs);
+ if (he == NULL)
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
+
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+ struct nft_hash_elem *prev;
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
+ return PTR_ERR(prev);
+ if (prev) {
+ *ext = &prev->ext;
+ return -EEXIST;
+ }
+ return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ rcu_read_lock();
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext))
+ nft_set_elem_change_active(net, set, &he->ext);
+ else
+ he = NULL;
+ }
+ rcu_read_unlock();
+
+ return he;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct rhashtable_iter hti;
+ struct nft_set_elem elem;
+ int err;
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ iter->err = err;
+ if (err)
+ return;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ err = PTR_ERR(he);
+ if (err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ continue;
+ }
+
+ if (iter->count < iter->skip)
+ goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = he;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ goto out;
+
+cont:
+ iter->count++;
+ }
+
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+}
+
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_hash);
+}
+
+static const struct rhashtable_params nft_hash_params = {
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
+};
+
+static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_hash_params;
+ int err;
+
+ params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.key_len = set->klen;
+
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
+}
+
+static void nft_hash_elem_destroy(void *ptr, void *arg)
+{
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+
+ cancel_delayed_work_sync(&priv->gc_work);
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ (void *)set);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ roundup_pow_of_two(desc->size * 4 / 3) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
+ .estimate = nft_hash_estimate,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup,
+ .update = nft_hash_update,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c
index ffe9ae0..38b5bda 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -96,7 +96,8 @@ out:
}
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- struct nft_rbtree_elem *new)
+ struct nft_rbtree_elem *new,
+ struct nft_set_ext **ext)
{
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
@@ -124,8 +125,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
else if (!nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_end(new))
p = &parent->rb_right;
- else
+ else {
+ *ext = &rbe->ext;
return -EEXIST;
+ }
}
}
}
@@ -135,13 +138,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
{
struct nft_rbtree_elem *rbe = elem->priv;
int err;
spin_lock_bh(&nft_rbtree_lock);
- err = __nft_rbtree_insert(net, set, rbe);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
spin_unlock_bh(&nft_rbtree_lock);
return err;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 188404b9..a3b8f69 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
return false;
if (info->match_flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires = 0;
+ unsigned long expires = nf_ct_expires(ct) / HZ;
- if (timer_pending(&ct->timeout))
- expires = (ct->timeout.expires - jiffies) / HZ;
if ((expires >= info->expires_min &&
expires <= info->expires_max) ^
!(info->invert_flags & XT_CONNTRACK_EXPIRES))
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e5f1898..bb33598 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
- pr_info("using --physdev-out and --physdev-is-out are only"
- "supported in the FORWARD and POSTROUTING chains with"
+ pr_info("using --physdev-out and --physdev-is-out are only "
+ "supported in the FORWARD and POSTROUTING chains with "
"bridged traffic.\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ef36a56..4dedb96 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb,
++i, offset, sch->type, htons(sch->length),
sch->flags);
#endif
- offset += WORD_ROUND(ntohs(sch->length));
+ offset += SCTP_PAD4(ntohs(sch->length));
pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);