From 4c552a64dfc2063ef060cf45788cd4250eea3596 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Sun, 1 Jun 2014 19:42:58 -0600 Subject: netfilter: nfnetlink_acct: Fix memory leak Allocation of memory need only to happen once, that is after the proper checks on the NFACCT_FLAGS have been done. Otherwise the code can return without freeing already allocated memory. Signed-off-by: Mathieu Poirier Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 70e86bb..54af985 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -83,7 +83,6 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EBUSY; } - nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); if (tb[NFACCT_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); if (flags & ~NFACCT_F_QUOTA) -- cgit v0.10.2 From 46bbafceb201df635b7c0a9d7a0e526cb2f8cb75 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 22 May 2014 12:36:03 +0200 Subject: netfilter: nf_tables: fix wrong transaction ordering in set elements The transaction needs to be placed at the end of the commit list, otherwise event notifications are reordered and we may crash when releasing object via call_rcu. This problem was introduced in 60319eb ("netfilter: nf_tables: use new transaction infrastructure to handle elements"). Reported-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0478847..9365531 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3077,7 +3077,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; nft_trans_elem(trans) = elem; - list_add(&trans->list, &ctx->net->nft.commit_list); + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; err4: @@ -3161,7 +3161,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, goto err2; nft_trans_elem(trans) = elem; - list_add(&trans->list, &ctx->net->nft.commit_list); + list_add_tail(&trans->list, &ctx->net->nft.commit_list); nft_data_uninit(&elem.key, NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) -- cgit v0.10.2 From a1cee076f4d4774504c62e0f1846a11a6fcb6be3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 May 2014 11:09:42 +0200 Subject: netfilter: nf_tables: release objects in reverse order in the abort path The patch c7c32e7 ("netfilter: nf_tables: defer all object release via rcu") indicates that we always release deleted objects in the reverse order, but that is only needed in the abort path. These are the two possible scenarios when releasing objects: 1) Deletion scenario in the commit path: no need to release objects in the reverse order since userspace already ensures that dependencies are fulfilled), ie. userspace tells us to delete rule -> ... -> rule -> chain -> table. In this case, we have to release the objects in the *same order* as userspace provided. 2) Deletion scenario in the abort path: we have to iterate in the reverse order to undo what it cannot be added, ie. userspace sent us a batch that includes: table -> chain -> rule -> ... -> rule, and that needs to be partially undone. In this case, we have to release objects in the reverse order to ensure that the set and chain objects point to valid rule and table objects. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9365531..4fffa36 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3527,7 +3527,8 @@ static int nf_tables_abort(struct sk_buff *skb) } } - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe_reverse(trans, next, + &net->nft.commit_list, list) { list_del(&trans->list); trans->ctx.nla = NULL; call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); -- cgit v0.10.2 From 7632667d26a99d3b33ec8dd522c4086653ff9388 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 28 May 2014 15:27:18 +0200 Subject: netfilter: nft_rbtree: introduce locking There's no rbtree rcu version yet, so let's fall back on the spinlock to protect the concurrent access of this structure both from user (to update the set content) and kernel-space (in the packet path). Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 072e611..e1836ff 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -18,6 +18,8 @@ #include #include +static DEFINE_SPINLOCK(nft_rbtree_lock); + struct nft_rbtree { struct rb_root root; }; @@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const struct rb_node *parent = priv->root.rb_node; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -53,6 +56,8 @@ found: goto out; if (set->flags & NFT_SET_MAP) nft_data_copy(data, rbe->data); + + spin_unlock_bh(&nft_rbtree_lock); return true; } } @@ -62,6 +67,7 @@ found: goto found; } out: + spin_unlock_bh(&nft_rbtree_lock); return false; } @@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set, !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(rbe->data, &elem->data); + spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); if (err < 0) kfree(rbe); + + spin_unlock_bh(&nft_rbtree_lock); return err; } @@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->cookie; + spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); + spin_unlock_bh(&nft_rbtree_lock); kfree(rbe); } @@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) struct nft_rbtree_elem *rbe; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; + spin_unlock_bh(&nft_rbtree_lock); return 0; } } + spin_unlock_bh(&nft_rbtree_lock); return -ENOENT; } @@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_elem elem; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { if (iter->count < iter->skip) goto cont; @@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, elem.flags = rbe->flags; iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) + if (iter->err < 0) { + spin_unlock_bh(&nft_rbtree_lock); return; + } cont: iter->count++; } + spin_unlock_bh(&nft_rbtree_lock); } static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) @@ -216,11 +234,13 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } + spin_unlock_bh(&nft_rbtree_lock); } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, -- cgit v0.10.2 From 4fefee570d8e35d950e6b7294618e2035e669308 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 May 2014 12:39:26 +0200 Subject: netfilter: nf_tables: allow to delete several objects from a batch Three changes to allow the deletion of several objects with dependencies in one transaction, they are: 1) Introduce speculative counter increment/decrement that is undone in the abort path if required, thus we avoid hitting -EBUSY when deleting the chain. The counter updates are reverted in the abort path. 2) Increment/decrement table/chain use counter for each set/rule. We need this to fully rely on the use counters instead of the list content, eg. !list_empty(&chain->rules) which evaluate true in the middle of the transaction. 3) Decrement table use counter when an anonymous set is bound to the rule in the commit path. This avoids hitting -EBUSY when deleting the table that contains anonymous sets. The anonymous sets are released in the nf_tables_rule_destroy path. This should not be a problem since the rule already bumped the use counter of the chain, so the bound anonymous set reflects dependencies through the rule object, which already increases the chain use counter. So the general assumption after this patch is that the use counters are bumped by direct object dependencies. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4fffa36..dbf8236 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -538,8 +538,7 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(table); if (table->flags & NFT_TABLE_INACTIVE) return -ENOENT; - - if (!list_empty(&table->chains) || !list_empty(&table->sets)) + if (table->use > 0) return -EBUSY; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); @@ -553,6 +552,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, static void nf_tables_table_destroy(struct nft_ctx *ctx) { + BUG_ON(ctx->table->use > 0); + kfree(ctx->table); module_put(ctx->afi->owner); } @@ -1128,6 +1129,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (err < 0) goto err2; + table->use++; list_add_tail(&chain->list, &table->chains); return 0; err2: @@ -1169,7 +1171,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(chain); if (chain->flags & NFT_CHAIN_INACTIVE) return -ENOENT; - if (!list_empty(&chain->rules) || chain->use > 0) + if (chain->use > 0) return -EBUSY; nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); @@ -1177,6 +1179,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; + table->use--; list_del(&chain->list); return 0; } @@ -1814,6 +1817,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err = -ENOMEM; goto err3; } + chain->use++; return 0; err3: @@ -1841,6 +1845,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); + ctx->chain->use--; return 0; } return -ENOENT; @@ -2588,6 +2593,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; list_add_tail(&set->list, &table->sets); + table->use++; return 0; err2: @@ -2642,6 +2648,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, return err; list_del(&set->list); + ctx.table->use--; return 0; } @@ -3122,6 +3129,8 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, err = nft_add_set_elem(&ctx, set, attr); if (err < 0) break; + + set->nelems++; } return err; } @@ -3196,6 +3205,8 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, err = nft_del_setelem(&ctx, set, attr); if (err < 0) break; + + set->nelems--; } return err; } @@ -3361,15 +3372,13 @@ static int nf_tables_commit(struct sk_buff *skb) case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_chain_commit_update(trans); - else { + else trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; - trans->ctx.table->use++; - } + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use--; nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && trans->ctx.chain->flags & NFT_BASE_CHAIN) { @@ -3392,6 +3401,13 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_NEWSET: nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET); nft_trans_destroy(trans); @@ -3401,7 +3417,6 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET); break; case NFT_MSG_NEWSETELEM: - nft_trans_elem_set(trans)->nelems++; nf_tables_setelem_notify(&trans->ctx, nft_trans_elem_set(trans), &nft_trans_elem(trans), @@ -3409,7 +3424,6 @@ static int nf_tables_commit(struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: - nft_trans_elem_set(trans)->nelems--; nf_tables_setelem_notify(&trans->ctx, nft_trans_elem_set(trans), &nft_trans_elem(trans), @@ -3487,6 +3501,7 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_destroy(trans); } else { + trans->ctx.table->use--; list_del(&trans->ctx.chain->list); if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && trans->ctx.chain->flags & NFT_BASE_CHAIN) { @@ -3496,32 +3511,39 @@ static int nf_tables_abort(struct sk_buff *skb) } break; case NFT_MSG_DELCHAIN: + trans->ctx.table->use++; list_add_tail(&trans->ctx.chain->list, &trans->ctx.table->chains); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: + trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); break; case NFT_MSG_DELRULE: + trans->ctx.chain->use++; nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + trans->ctx.table->use--; list_del(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: + trans->ctx.table->use++; list_add_tail(&nft_trans_set(trans)->list, &trans->ctx.table->sets); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: + nft_trans_elem_set(trans)->nelems--; set = nft_trans_elem_set(trans); set->ops->get(set, &nft_trans_elem(trans)); set->ops->remove(set, &nft_trans_elem(trans)); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: + nft_trans_elem_set(trans)->nelems++; nft_trans_destroy(trans); break; } -- cgit v0.10.2 From 31f8441c328b1c038c3e44bec740fb29393a56ad Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 29 May 2014 10:29:58 +0200 Subject: netfilter: nf_tables: atomic allocation in set notifications from rcu callback Use GFP_ATOMIC allocations when sending removal notifications of anonymous sets from rcu callback context. Sleeping in that context is illegal. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dbf8236..624e083 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2191,7 +2191,7 @@ nla_put_failure: static int nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, - int event) + int event, gfp_t gfp_flags) { struct sk_buff *skb; u32 portid = ctx->portid; @@ -2202,7 +2202,7 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, return 0; err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); if (skb == NULL) goto err; @@ -2213,7 +2213,7 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, } err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + ctx->report, gfp_flags); err: if (err < 0) nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); @@ -2613,7 +2613,7 @@ static void nft_set_destroy(struct nft_set *set) static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { list_del(&set->list); - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); nft_set_destroy(set); } @@ -3409,12 +3409,12 @@ static int nf_tables_commit(struct sk_buff *skb) trans->ctx.table->use--; nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), - NFT_MSG_NEWSET); + NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; case NFT_MSG_DELSET: nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), - NFT_MSG_DELSET); + NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: nf_tables_setelem_notify(&trans->ctx, -- cgit v0.10.2