summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-10 01:38:18 (GMT)
committerDavid S. Miller <davem@davemloft.net>2016-11-10 01:38:18 (GMT)
commit9fa684ec86f8b011bbad09a53cf91258e176d4c1 (patch)
tree3cbeed802a00d930492ec4e7049734b85d8fe887 /net
parentf567e950bf51290755a2539ff2aaef4c26f735d3 (diff)
parent58c78e104d937c1f560fb10ed9bb2dcde0db4fcf (diff)
downloadlinux-9fa684ec86f8b011bbad09a53cf91258e176d4c1.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains a larger than usual batch of Netfilter fixes for your net tree. This series contains a mixture of old bugs and recently introduced bugs, they are: 1) Fix a crash when using nft_dynset with nft_set_rbtree, which doesn't support the set element updates from the packet path. From Liping Zhang. 2) Fix leak when nft_expr_clone() fails, from Liping Zhang. 3) Fix a race when inserting new elements to the set hash from the packet path, also from Liping. 4) Handle segmented TCP SIP packets properly, basically avoid that the INVITE in the allow header create bogus expectations by performing stricter SIP message parsing, from Ulrich Weber. 5) nft_parse_u32_check() should return signed integer for errors, from John Linville. 6) Fix wrong allocation instead of connlabels, allocate 16 instead of 32 bytes, from Florian Westphal. 7) Fix compilation breakage when building the ip_vs_sync code with CONFIG_OPTIMIZE_INLINING on x86, from Arnd Bergmann. 8) Destroy the new set if the transaction object cannot be allocated, also from Liping Zhang. 9) Use device to route duplicated packets via nft_dup only when set by the user, otherwise packets may not follow the right route, again from Liping. 10) Fix wrong maximum genetlink attribute definition in IPVS, from WANG Cong. 11) Ignore untracked conntrack objects from xt_connmark, from Florian Westphal. 12) Allow to use conntrack helpers that are registered NFPROTO_UNSPEC via CT target, otherwise we cannot use the h.245 helper, from Florian. 13) Revisit garbage collection heuristic in the new workqueue-based timer approach for conntrack to evict objects earlier, again from Florian. 14) Fix crash in nf_tables when inserting an element into a verdict map, from Liping Zhang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c6
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c49
-rw-r--r--net/netfilter/nf_conntrack_helper.c11
-rw-r--r--net/netfilter/nf_conntrack_sip.c5
-rw-r--r--net/netfilter/nf_tables_api.c18
-rw-r--r--net/netfilter/nft_dynset.c19
-rw-r--r--net/netfilter/nft_set_hash.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/netfilter/xt_connmark.c4
12 files changed, 108 insertions, 40 deletions
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bf855e6..0c01a270 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8bfd470..831f86e 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
- int oif = regs->data[priv->sreg_dev];
+ int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
- if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+ goto nla_put_failure;
+ if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c3c809b..a6e44ef 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = {
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
- .maxattr = IPVS_CMD_MAX,
+ .maxattr = IPVS_CMD_ATTR_MAX,
.netnsok = true, /* Make ipvsadm to work on netns */
};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1b07578..9350530 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
*/
static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
{
+ memset(ho, 0, sizeof(*ho));
ho->init_seq = get_unaligned_be32(&no->init_seq);
ho->delta = get_unaligned_be32(&no->delta);
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
@@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
kfree(param->pe_data);
}
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
+ if (opt) {
+ cp->in_seq = opt->in_seq;
+ cp->out_seq = opt->out_seq;
+ }
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df2f5a3..0f87e5d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
-#define GC_MAX_BUCKETS 8192u
-#define GC_INTERVAL (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX (2 * HZ)
+/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
static struct conntrack_gc_work conntrack_gc_work;
@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned long next_run = GC_INTERVAL;
- unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
+ unsigned int ratio, scanned = 0;
+ unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
do {
@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
if (gc_work->exiting)
return;
+ /*
+ * Eviction will normally happen from the packet path, and not
+ * from this gc worker.
+ *
+ * This worker is only here to reap expired entries when system went
+ * idle after a busy period.
+ *
+ * The heuristics below are supposed to balance conflicting goals:
+ *
+ * 1. Minimize time until we notice a stale entry
+ * 2. Maximize scan intervals to not waste cycles
+ *
+ * Normally, expired_count will be 0, this increases the next_run time
+ * to priorize 2) above.
+ *
+ * As soon as a timed-out entry is found, move towards 1) and increase
+ * the scan frequency.
+ * In case we have lots of evictions next scan is done immediately.
+ */
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
+ if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+ gc_work->next_gc_run = 0;
next_run = 0;
+ } else if (expired_count) {
+ gc_work->next_gc_run /= 2U;
+ next_run = msecs_to_jiffies(1);
+ } else {
+ if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+ gc_work->next_gc_run += msecs_to_jiffies(1);
+
+ next_run = gc_work->next_gc_run;
+ }
gc_work->last_bucket = i;
- schedule_delayed_work(&gc_work->dwork, next_run);
+ queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}
@@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 336e215..7341adf 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
- if (!strcmp(h->name, name) &&
- h->tuple.src.l3num == l3num &&
- h->tuple.dst.protonum == protonum)
+ if (strcmp(h->name, name))
+ continue;
+
+ if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
+ h->tuple.src.l3num != l3num)
+ continue;
+
+ if (h->tuple.dst.protonum == protonum)
return h;
}
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 621b81c..c3fc14e 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
- if (*datalen < handler->len ||
+ if (*datalen < handler->len + 2 ||
strncasecmp(*dptr, handler->method, handler->len))
continue;
+ if ((*dptr)[handler->len] != ' ' ||
+ !isalpha((*dptr)[handler->len+1]))
+ continue;
if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
&matchoff, &matchlen) <= 0) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24db222..026581b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
+err3:
+ ops->destroy(set);
err2:
kfree(set);
err1:
@@ -3452,14 +3454,15 @@ void *nft_set_elem_init(const struct nft_set *set,
return elem;
}
-void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
@@ -3565,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
+ .net = ctx->net,
.afi = ctx->afi,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
@@ -3812,7 +3816,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
@@ -4030,7 +4034,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4171,7 +4175,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
@@ -4421,7 +4425,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
* Otherwise a 0 is returned and the attribute value is stored in the
* destination variable.
*/
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
u32 val;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 517f087..31ca947 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
timeout, GFP_ATOMIC);
- if (elem == NULL) {
- if (set->size)
- atomic_dec(&set->nelems);
- return NULL;
- }
+ if (elem == NULL)
+ goto err1;
ext = nft_set_elem_ext(set, elem);
if (priv->expr != NULL &&
nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
- return NULL;
+ goto err2;
return elem;
+
+err2:
+ nft_set_elem_destroy(set, elem, false);
+err1:
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
}
static void nft_dynset_eval(const struct nft_expr *expr,
@@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
+ if (set->ops->update == NULL)
+ return -EOPNOTSUPP;
+
if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3794cb2..a3dface 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
+ struct nft_hash_elem *he, *prev;
struct nft_hash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
@@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
he = new(set, expr, regs);
if (he == NULL)
goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
goto err2;
+
+ /* Another cpu may race to insert the element with the same key */
+ if (prev) {
+ nft_set_elem_destroy(set, he, true);
+ he = prev;
+ }
+
out:
*ext = &he->ext;
return true;
err2:
- nft_set_elem_destroy(set, he);
+ nft_set_elem_destroy(set, he, true);
err1:
return false;
}
@@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
}
static void nft_hash_destroy(const struct nft_set *set)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 38b5bda..36493a7 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe, true);
}
}
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 69f78e9..b83e158 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int32_t newmark;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return XT_CONTINUE;
switch (info->mode) {
@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (ct == NULL || nf_ct_is_untracked(ct))
return false;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;