summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netfilter.h42
-rw-r--r--include/linux/netfilter/x_tables.h8
-rw-r--r--include/linux/netfilter_bridge.h12
-rw-r--r--include/linux/skbuff.h19
-rw-r--r--include/net/netns/netfilter.h1
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_sctp.h2
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_cttimeout.h2
-rw-r--r--net/bridge/br_netfilter_hooks.c20
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c32
-rw-r--r--net/ipv4/netfilter/ip_tables.c68
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c7
-rw-r--r--net/ipv6/netfilter/ip6_tables.c52
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c5
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c7
-rw-r--r--net/netfilter/core.c225
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c101
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_queue.c12
-rw-r--r--net/netfilter/nf_tables_api.c8
-rw-r--r--net/netfilter/nf_tables_core.c5
-rw-r--r--net/netfilter/x_tables.c29
-rw-r--r--net/netfilter/xt_TEE.c15
24 files changed, 461 insertions, 217 deletions
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 00050df..d788ce6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -11,6 +11,8 @@
#include <linux/list.h>
#include <linux/static_key.h>
#include <linux/netfilter_defs.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
#ifdef CONFIG_NETFILTER
static inline int NF_DROP_GETERR(int verdict)
@@ -118,6 +120,13 @@ struct nf_sockopt_ops {
};
/* Function to register/unregister hook points. */
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops);
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops);
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n);
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n);
+
int nf_register_hook(struct nf_hook_ops *reg);
void nf_unregister_hook(struct nf_hook_ops *reg);
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
@@ -128,33 +137,26 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
int nf_register_sockopt(struct nf_sockopt_ops *reg);
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
-extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-
#ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook)
{
if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook))
return static_key_false(&nf_hooks_needed[pf][hook]);
- return !list_empty(nf_hook_list);
+ return !list_empty(hook_list);
}
#else
-static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook)
{
- return !list_empty(nf_hook_list);
+ return !list_empty(hook_list);
}
#endif
-static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
-{
- return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
-}
-
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
/**
@@ -172,10 +174,13 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
int (*okfn)(struct sock *, struct sk_buff *),
int thresh)
{
- if (nf_hooks_active(pf, hook)) {
+ struct net *net = dev_net(indev ? indev : outdev);
+ struct list_head *hook_list = &net->nf.hooks[pf][hook];
+
+ if (nf_hook_list_active(hook_list, pf, hook)) {
struct nf_hook_state state;
- nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
+ nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state);
}
@@ -385,4 +390,15 @@ extern struct nfq_ct_hook __rcu *nfq_ct_hook;
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
+/**
+ * nf_skb_duplicated - TEE target has sent a packet
+ *
+ * When a xtables target sends a packet, the OUTPUT and POSTROUTING
+ * hooks are traversed again, i.e. nft and xtables are invoked recursively.
+ *
+ * This is used by xtables TEE target to prevent the duplicated skb from
+ * being duplicated again.
+ */
+DECLARE_PER_CPU(bool, nf_skb_duplicated);
+
#endif /*__LINUX_NETFILTER_H*/
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 286098a..b006b71 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -3,6 +3,7 @@
#include <linux/netdevice.h>
+#include <linux/static_key.h>
#include <uapi/linux/netfilter/x_tables.h>
/**
@@ -222,7 +223,6 @@ struct xt_table_info {
* @stacksize jumps (number of user chains) can possibly be made.
*/
unsigned int stacksize;
- unsigned int __percpu *stackptr;
void ***jumpstack;
unsigned char entries[0] __aligned(8);
@@ -281,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info);
*/
DECLARE_PER_CPU(seqcount_t, xt_recseq);
+/* xt_tee_enabled - true if x_tables needs to handle reentrancy
+ *
+ * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
+ */
+extern struct static_key xt_tee_enabled;
+
/**
* xt_write_recseq_begin - start of a write section
*
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 6d80fc6..2437b8a 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -17,9 +17,6 @@ enum nf_br_hook_priorities {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-#define BRNF_BRIDGED_DNAT 0x02
-#define BRNF_NF_BRIDGE_PREROUTING 0x08
-
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
static inline void br_drop_fake_rtable(struct sk_buff *skb)
@@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb)
{
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
}
+
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+ return skb->nf_bridge && skb->nf_bridge->in_prerouting;
+}
#else
#define br_drop_fake_rtable(skb) do { } while (0)
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+ return false;
+}
#endif /* CONFIG_BRIDGE_NETFILTER */
#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b7c1286..df9fdf5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -174,17 +174,24 @@ struct nf_bridge_info {
BRNF_PROTO_8021Q,
BRNF_PROTO_PPPOE
} orig_proto:8;
- bool pkt_otherhost;
+ u8 pkt_otherhost:1;
+ u8 in_prerouting:1;
+ u8 bridged_dnat:1;
__u16 frag_max_size;
- unsigned int mask;
struct net_device *physindev;
union {
- struct net_device *physoutdev;
- char neigh_header[8];
- };
- union {
+ /* prerouting: detect dnat in orig/reply direction */
__be32 ipv4_daddr;
struct in6_addr ipv6_daddr;
+
+ /* after prerouting + nat detected: store original source
+ * mac since neigh resolution overwrites it, only used while
+ * skb is out in neigh layer.
+ */
+ char neigh_header[8];
+
+ /* always valid & non-NULL from FORWARD on, for physdev match */
+ struct net_device *physoutdev;
};
};
#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 532e4ba..38aa498 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -14,5 +14,6 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
+ struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
};
#endif
diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
index ceeefe6..ed4e776 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
@@ -13,6 +13,8 @@ enum sctp_conntrack {
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_ACKED,
SCTP_CONNTRACK_MAX
};
diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
index 1ab0b97..f2c10dc 100644
--- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
+++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
@@ -92,6 +92,8 @@ enum ctattr_timeout_sctp {
CTA_TIMEOUT_SCTP_SHUTDOWN_SENT,
CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
__CTA_TIMEOUT_SCTP_MAX
};
#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c8b9bcf..0a6f095 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -49,9 +49,9 @@ static struct ctl_table_header *brnf_sysctl_header;
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
-static int brnf_filter_vlan_tagged __read_mostly = 0;
-static int brnf_filter_pppoe_tagged __read_mostly = 0;
-static int brnf_pass_vlan_indev __read_mostly = 0;
+static int brnf_filter_vlan_tagged __read_mostly;
+static int brnf_filter_pppoe_tagged __read_mostly;
+static int brnf_pass_vlan_indev __read_mostly;
#else
#define brnf_call_iptables 1
#define brnf_call_ip6tables 1
@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
- nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
nf_bridge->pkt_otherhost = true;
}
- nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (skb->nf_bridge &&
- !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
return NF_STOP;
- }
return NF_ACCEPT;
}
@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
- nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
static int br_nf_dev_xmit(struct sk_buff *skb)
{
- if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+ if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 13b7d1e..77383bf 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
skb_dst_drop(skb);
v6ops->route_input(skb);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 92305a1..c416cb3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
-static inline __pure
+static inline
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
+ /* No TEE support for arptables, so no need to switch to alternate
+ * stack. All targets that reenter must return absolute verdicts.
+ */
e = get_entry(table_base, private->hook_entry[hook]);
acpar.in = state->in;
@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
if (table_base + v
!= arpt_next_entry(e)) {
-
- if (stackidx >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
jumpstack[stackidx++] = e;
}
@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
continue;
}
- /* Targets which reenter must return
- * abs. verdicts
- */
acpar.target = t->u.kernel.target;
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
-static int mark_source_chains(const struct xt_table_info *newinfo,
+static int mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 0;
}
+ if (entry0 + newpos != arpt_next_entry(e) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
break;
++i;
- if (strcmp(arpt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name,
break;
}
++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6c72fbb..787f99e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure
+static inline
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
const char *indev, *outdev;
const void *table_base;
struct ipt_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
ip = ip_hdr(skb);
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
@@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb,
smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
- pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
- table->name, hook, origptr,
+ pr_debug("Entering %s(hook %u), UF %p\n",
+ table->name, hook,
get_entry(table_base, private->underflow[hook]));
do {
@@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr) {
+ if (stackidx == 0) {
e = get_entry(table_base,
private->underflow[hook]);
pr_debug("Underflow (this is normal) "
"to %p\n", e);
} else {
- e = jumpstack[--*stackptr];
+ e = jumpstack[--stackidx];
pr_debug("Pulled %p out from pos %u\n",
- e, *stackptr);
+ e, stackidx);
e = ipt_next_entry(e);
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
pr_debug("Pushed %p into pos %u\n",
- e, *stackptr - 1);
+ e, stackidx - 1);
}
e = get_entry(table_base, v);
@@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- pr_debug("Exiting %s; resetting sp from %u to %u\n",
- __func__, *stackptr, origptr);
- *stackptr = origptr;
+ pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
xt_write_recseq_end(addend);
local_bh_enable();
@@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ WARN_ON_ONCE(calldepth == 0);
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos);
return 0;
}
+ if (entry0 + newpos != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
+ duprintf("Jump rule %u -> %u, calldepth %d\n",
+ pos, newpos, calldepth);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
- if (strcmp(ipt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index c88b7d4..b69e82b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ if (nf_bridge_in_prerouting(skb))
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN + zone;
else
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3c35ced..4e21f80 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -305,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
ip6t_next_entry(const struct ip6t_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb,
const char *indev, *outdev;
const void *table_base;
struct ip6t_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -357,8 +358,16 @@ ip6t_do_table(struct sk_buff *skb,
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
@@ -406,20 +415,16 @@ ip6t_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr)
+ if (stackidx == 0)
e = get_entry(table_base,
private->underflow[hook]);
else
- e = ip6t_next_entry(jumpstack[--*stackptr]);
+ e = ip6t_next_entry(jumpstack[--stackidx]);
continue;
}
if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
}
e = get_entry(table_base, v);
@@ -437,8 +442,6 @@ ip6t_do_table(struct sk_buff *skb,
break;
} while (!acpar.hotdrop);
- *stackptr = origptr;
-
xt_write_recseq_end(addend);
local_bh_enable();
@@ -452,11 +455,15 @@ ip6t_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -470,6 +477,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -531,6 +539,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -544,6 +554,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos);
return 0;
}
+ if (entry0 + newpos != ip6t_next_entry(e) &&
+ !(e->ipv6.flags & IP6T_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -560,6 +575,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -839,9 +855,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
- if (strcmp(ip6t_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1754,9 +1767,6 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 12331ef..567367a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
- pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -65,9 +63,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum);
break;
- default:
- net_info_ratelimited("case %u not handled yet\n", reject->with);
- break;
}
return NF_DROP;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a45db0b..267fb8d 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -39,12 +39,9 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ if (nf_bridge_in_prerouting(skb))
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
if (hooknum == NF_INET_PRE_ROUTING)
return IP6_DEFRAG_CONNTRACK_IN + zone;
else
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a0e5497..2a5a070 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
+DEFINE_PER_CPU(bool, nf_skb_duplicated);
+EXPORT_SYMBOL_GPL(nf_skb_duplicated);
+
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
mutex_lock(&afinfo_mutex);
@@ -52,9 +55,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
}
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
-EXPORT_SYMBOL(nf_hooks);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
@@ -62,63 +62,166 @@ EXPORT_SYMBOL(nf_hooks_needed);
static DEFINE_MUTEX(nf_hook_mutex);
-int nf_register_hook(struct nf_hook_ops *reg)
+static struct list_head *nf_find_hook_list(struct net *net,
+ const struct nf_hook_ops *reg)
{
- struct list_head *nf_hook_list;
- struct nf_hook_ops *elem;
+ struct list_head *hook_list = NULL;
- mutex_lock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
+ if (reg->pf != NFPROTO_NETDEV)
+ hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+ else if (reg->hooknum == NF_NETDEV_INGRESS) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- BUG_ON(reg->dev == NULL);
- nf_hook_list = &reg->dev->nf_hooks_ingress;
- net_inc_ingress_queue();
- break;
- }
+ if (reg->dev && dev_net(reg->dev) == net)
+ hook_list = &reg->dev->nf_hooks_ingress;
#endif
- /* Fall through. */
- default:
- nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
- break;
+ }
+ return hook_list;
+}
+
+struct nf_hook_entry {
+ const struct nf_hook_ops *orig_ops;
+ struct nf_hook_ops ops;
+};
+
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
+ struct nf_hook_ops *elem;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->orig_ops = reg;
+ entry->ops = *reg;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list) {
+ kfree(entry);
+ return -ENOENT;
}
- list_for_each_entry(elem, nf_hook_list, list) {
+ mutex_lock(&nf_hook_mutex);
+ list_for_each_entry(elem, hook_list, list) {
if (reg->priority < elem->priority)
break;
}
- list_add_rcu(&reg->list, elem->list.prev);
+ list_add_rcu(&entry->ops.list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_inc_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
}
-EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(nf_register_net_hook);
-void nf_unregister_hook(struct nf_hook_ops *reg)
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
+ struct nf_hook_ops *elem;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list)
+ return;
+
mutex_lock(&nf_hook_mutex);
- list_del_rcu(&reg->list);
- mutex_unlock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
-#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- net_dec_ingress_queue();
+ list_for_each_entry(elem, hook_list, list) {
+ entry = container_of(elem, struct nf_hook_entry, ops);
+ if (entry->orig_ops == reg) {
+ list_del_rcu(&entry->ops.list);
break;
}
- break;
-#endif
- default:
- break;
}
+ mutex_unlock(&nf_hook_mutex);
+ if (&elem->list == hook_list) {
+ WARN(1, "nf_unregister_net_hook: hook not found!\n");
+ return;
+ }
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(reg);
+ nf_queue_nf_hook_drop(net, &entry->ops);
+ kfree(entry);
+}
+EXPORT_SYMBOL(nf_unregister_net_hook);
+
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = nf_register_net_hook(net, &reg[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ nf_unregister_net_hooks(net, reg, i);
+ return err;
+}
+EXPORT_SYMBOL(nf_register_net_hooks);
+
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ while (n-- > 0)
+ nf_unregister_net_hook(net, &reg[n]);
+}
+EXPORT_SYMBOL(nf_unregister_net_hooks);
+
+static LIST_HEAD(nf_hook_list);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct net *net, *last;
+ int ret;
+
+ rtnl_lock();
+ for_each_net(net) {
+ ret = nf_register_net_hook(net, reg);
+ if (ret && ret != -ENOENT)
+ goto rollback;
+ }
+ list_add_tail(&reg->list, &nf_hook_list);
+ rtnl_unlock();
+
+ return 0;
+rollback:
+ last = net;
+ for_each_net(net) {
+ if (net == last)
+ break;
+ nf_unregister_net_hook(net, reg);
+ }
+ rtnl_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(nf_register_hook);
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_del(&reg->list);
+ for_each_net(net)
+ nf_unregister_net_hook(net, reg);
+ rtnl_unlock();
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -295,8 +398,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
+static int nf_register_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+ int ret;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list) {
+ ret = nf_register_net_hook(net, elem);
+ if (ret && ret != -ENOENT)
+ goto out_undo;
+ }
+ rtnl_unlock();
+ return 0;
+
+out_undo:
+ list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+ return ret;
+}
+
+static void nf_unregister_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+}
+
static int __net_init netfilter_net_init(struct net *net)
{
+ int i, h, ret;
+
+ for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+ }
+
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
@@ -307,11 +448,16 @@ static int __net_init netfilter_net_init(struct net *net)
return -ENOMEM;
}
#endif
- return 0;
+ ret = nf_register_hook_list(net);
+ if (ret)
+ remove_proc_entry("netfilter", net->proc_net);
+
+ return ret;
}
static void __net_exit netfilter_net_exit(struct net *net)
{
+ nf_unregister_hook_list(net);
remove_proc_entry("netfilter", net->proc_net);
}
@@ -322,12 +468,7 @@ static struct pernet_operations netfilter_net_ops = {
int __init netfilter_init(void)
{
- int i, h, ret;
-
- for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&nf_hooks[i][h]);
- }
+ int ret;
ret = register_pernet_subsys(&netfilter_net_ops);
if (ret < 0)
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 7e81416..a2ff7d7 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
- if (scheduler && scheduler->module)
+ if (scheduler)
module_put(scheduler->module);
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b45da90..6719773 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -42,6 +42,8 @@ static const char *const sctp_conntrack_names[] = {
"SHUTDOWN_SENT",
"SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT",
+ "HEARTBEAT_SENT",
+ "HEARTBEAT_ACKED",
};
#define SECS * HZ
@@ -57,6 +59,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
};
#define sNO SCTP_CONNTRACK_NONE
@@ -67,6 +71,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
+#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -88,6 +94,10 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
+HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
+HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
+ that of the HEARTBEAT chunk. Secondary connection is
+ established.
*/
/* TODO
@@ -97,36 +107,40 @@ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- Check the error type in the reply dir before transitioning from
cookie echoed to closed.
- Sec 5.2.4 of RFC 2960
- - Multi Homing support.
+ - Full Multi Homing support.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
}
};
@@ -278,9 +292,16 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8;
break;
+ case SCTP_CID_HEARTBEAT:
+ pr_debug("SCTP_CID_HEARTBEAT");
+ i = 9;
+ break;
+ case SCTP_CID_HEARTBEAT_ACK:
+ pr_debug("SCTP_CID_HEARTBEAT_ACK");
+ i = 10;
+ break;
default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
+ /* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]);
return cur_state;
@@ -329,6 +350,8 @@ static int sctp_packet(struct nf_conn *ct,
!test_bit(SCTP_CID_COOKIE_ECHO, map) &&
!test_bit(SCTP_CID_ABORT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
sh->vtag != ct->proto.sctp.vtag[dir]) {
pr_debug("Verification tag check failed\n");
goto out;
@@ -357,6 +380,16 @@ static int sctp_packet(struct nf_conn *ct,
/* Sec 8.5.1 (D) */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_HEARTBEAT ||
+ sch->type == SCTP_CID_HEARTBEAT_ACK) {
+ if (ct->proto.sctp.vtag[dir] == 0) {
+ pr_debug("Setting vtag %x for dir %d\n",
+ sh->vtag, dir);
+ ct->proto.sctp.vtag[dir] = sh->vtag;
+ } else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out_unlock;
+ }
}
old_state = ct->proto.sctp.state;
@@ -466,6 +499,10 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Sec 8.5.1 (A) */
return false;
}
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
+ pr_debug("Setting vtag %x for secondary conntrack\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
@@ -610,6 +647,8 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
@@ -658,6 +697,18 @@ static struct ctl_table sctp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{ }
};
@@ -730,6 +781,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
+ pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
+ pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
#endif
return 0;
}
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 3992106..0655225 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -19,7 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8a8b2ab..96777f9 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -105,21 +105,15 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
{
const struct nf_queue_handler *qh;
- struct net *net;
- rtnl_lock();
rcu_read_lock();
qh = rcu_dereference(queue_handler);
- if (qh) {
- for_each_net(net) {
- qh->nf_hook_drop(net, ops);
- }
- }
+ if (qh)
+ qh->nf_hook_drop(net, ops);
rcu_read_unlock();
- rtnl_unlock();
}
/*
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cfe6368..4a41eb9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -130,20 +130,24 @@ static void nft_trans_destroy(struct nft_trans *trans)
int nft_register_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return 0;
- return nf_register_hooks(basechain->ops, hook_nops);
+ return nf_register_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_register_basechain);
void nft_unregister_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return;
- nf_unregister_hooks(basechain->ops, hook_nops);
+ nf_unregister_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_unregister_basechain);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f77bad4..05d0b03 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,7 +114,6 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
@@ -125,10 +124,6 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
- /* Ignore chains that are not for the current network namespace */
- if (!net_eq(net, chain_net))
- return NF_ACCEPT;
-
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d324fe7..9b42b5e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
[NFPROTO_IPV6] = "ip6",
};
-/* Allow this many total (re)entries. */
-static const unsigned int xt_jumpstack_multiplier = 2;
-
/* Registration hooks for targets. */
int xt_register_target(struct xt_target *target)
{
@@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info)
kvfree(info->jumpstack);
}
- free_percpu(info->stackptr);
-
kvfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
@@ -732,15 +727,14 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+struct static_key xt_tee_enabled __read_mostly;
+EXPORT_SYMBOL_GPL(xt_tee_enabled);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
int cpu;
- i->stackptr = alloc_percpu(unsigned int);
- if (i->stackptr == NULL)
- return -ENOMEM;
-
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->jumpstack = vzalloc(size);
@@ -749,8 +743,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
if (i->jumpstack == NULL)
return -ENOMEM;
- i->stacksize *= xt_jumpstack_multiplier;
- size = sizeof(void *) * i->stacksize;
+ /* ruleset without jumps -- no stack needed */
+ if (i->stacksize == 0)
+ return 0;
+
+ /* Jumpstack needs to be able to record two full callchains, one
+ * from the first rule set traversal, plus one table reentrancy
+ * via -j TEE without clobbering the callchain that brought us to
+ * TEE target.
+ *
+ * This is done by allocating two jumpstacks per cpu, on reentry
+ * the upper half of the stack is used.
+ *
+ * see the jumpstack setup in ipt_do_table() for more details.
+ */
+ size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size,
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index a747eb4..c5d6556 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -37,7 +37,6 @@ struct xt_tee_priv {
};
static const union nf_inet_addr tee_zero_address;
-static DEFINE_PER_CPU(bool, tee_active);
static struct net *pick_net(struct sk_buff *skb)
{
@@ -88,7 +87,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_tee_tginfo *info = par->targinfo;
struct iphdr *iph;
- if (__this_cpu_read(tee_active))
+ if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
@@ -125,9 +124,9 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
ip_send_check(iph);
if (tee_tg_route4(skb, info)) {
- __this_cpu_write(tee_active, true);
+ __this_cpu_write(nf_skb_duplicated, true);
ip_local_out(skb);
- __this_cpu_write(tee_active, false);
+ __this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
@@ -170,7 +169,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- if (__this_cpu_read(tee_active))
+ if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
@@ -188,9 +187,9 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
--iph->hop_limit;
}
if (tee_tg_route6(skb, info)) {
- __this_cpu_write(tee_active, true);
+ __this_cpu_write(nf_skb_duplicated, true);
ip6_local_out(skb);
- __this_cpu_write(tee_active, false);
+ __this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
@@ -252,6 +251,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
} else
info->priv = NULL;
+ static_key_slow_inc(&xt_tee_enabled);
return 0;
}
@@ -263,6 +263,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
unregister_netdevice_notifier(&info->priv->notifier);
kfree(info->priv);
}
+ static_key_slow_dec(&xt_tee_enabled);
}
static struct xt_target tee_tg_reg[] __read_mostly = {