summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c94
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c962
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c26
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_expect.c25
-rw-r--r--net/netfilter/nf_conntrack_extend.c11
-rw-r--r--net/netfilter/nf_conntrack_helper.c10
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_conntrack_proto.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c14
-rw-r--r--net/netfilter/nf_conntrack_standalone.c9
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_queue.c18
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/xt_CLASSIFY.c36
-rw-r--r--net/netfilter/xt_NFQUEUE.c6
27 files changed, 1095 insertions, 292 deletions
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe2..e69d537 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -214,7 +214,7 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +231,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
}
EXPORT_SYMBOL(nf_ct_attach);
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
EXPORT_SYMBOL(nf_ct_destroy);
void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecd..66e4662 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,18 +48,18 @@
/*
* Connection hash size. Default is what was selected at compile time.
*/
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
/* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
/*
* Connection hash table: for input and output packets lookups of IPVS
*/
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
@@ -71,7 +71,7 @@ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
/* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
/*
* Fine locking granularity for big connection hash table
@@ -176,8 +176,8 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
NULL, 0, &p);
- if (cp->dest && cp->dest->svc->pe) {
- p.pe = cp->dest->svc->pe;
+ if (cp->pe) {
+ p.pe = cp->pe;
p.pe_data = cp->pe_data;
p.pe_data_len = cp->pe_data_len;
}
@@ -354,7 +354,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (p->pe_data && p->pe->ct_match) {
- if (p->pe->ct_match(p, cp))
+ if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out;
continue;
}
@@ -613,7 +613,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
- cp->protocol);
+ cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@@ -765,6 +765,7 @@ static void ip_vs_conn_expire(unsigned long data)
if (cp->flags & IP_VS_CONN_F_NFCT)
ip_vs_conn_drop_conntrack(cp);
+ ip_vs_pe_put(cp->pe);
kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
@@ -802,7 +803,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
- struct ip_vs_dest *dest)
+ struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
@@ -826,7 +827,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
&cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
- if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+ cp->fwmark = fwmark;
+ if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+ ip_vs_pe_get(p->pe);
+ cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
}
@@ -958,15 +962,13 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
- if (cp->dest && cp->pe_data &&
- cp->dest->svc->pe->show_pe_data) {
+ if (cp->pe_data) {
pe_data[0] = ' ';
- len = strlen(cp->dest->svc->pe->name);
- memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+ len = strlen(cp->pe->name);
+ memcpy(pe_data + 1, cp->pe->name, len);
pe_data[len + 1] = ' ';
len += 2;
- len += cp->dest->svc->pe->show_pe_data(cp,
- pe_data + len);
+ len += cp->pe->show_pe_data(cp, pe_data + len);
}
pe_data[len] = '\0';
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9..5287771 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -177,7 +177,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
return pp->state_transition(cp, direction, skb, pp);
}
-static inline void
+static inline int
ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
struct sk_buff *skb, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
@@ -187,7 +187,9 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
- p->pe->fill_param(p, skb);
+ return p->pe->fill_param(p, skb);
+
+ return 0;
}
/*
@@ -200,7 +202,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
- __be16 ports[2])
+ __be16 src_port, __be16 dst_port, int *ignored)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -224,8 +226,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -247,14 +249,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
__be16 vport = 0;
- if (ports[1] == svc->port) {
+ if (dst_port == svc->port) {
/* non-FTP template:
* <protocol, caddr, 0, vaddr, vport, daddr, dport>
* FTP template:
* <protocol, caddr, 0, vaddr, 0, daddr, 0>
*/
if (svc->port != FTPPORT)
- vport = ports[1];
+ vport = dst_port;
} else {
/* Note: persistent fwmark-based services and
* persistent port zero service are handled here.
@@ -268,24 +270,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
vaddr = &fwmark;
}
}
- ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
- vaddr, vport, &param);
+ /* return *ignored = -1 so NF_DROP can be used */
+ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+ vaddr, vport, &param) < 0) {
+ *ignored = -1;
+ return NULL;
+ }
}
/* Check if a template already exists */
ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) {
- /* No template found or the dest of the connection
+ /*
+ * No template found or the dest of the connection
* template is not available.
+ * return *ignored=0 i.e. ICMP and NF_DROP
*/
dest = svc->scheduler->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
+ *ignored = 0;
return NULL;
}
- if (ports[1] == svc->port && svc->port != FTPPORT)
+ if (dst_port == svc->port && svc->port != FTPPORT)
dport = dest->port;
/* Create a template
@@ -293,9 +302,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* and thus param.pe_data will be destroyed
* when the template expires */
ct = ip_vs_conn_new(&param, &dest->addr, dport,
- IP_VS_CONN_F_TEMPLATE, dest);
+ IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
+ *ignored = -1;
return NULL;
}
@@ -306,7 +316,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
kfree(param.pe_data);
}
- dport = ports[1];
+ dport = dst_port;
if (dport == svc->port && dest->port)
dport = dest->port;
@@ -317,11 +327,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
- &iph.daddr, ports[1], &param);
- cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
+ &iph.daddr, dst_port, &param);
+
+ cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
+ *ignored = -1;
return NULL;
}
@@ -341,6 +353,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* It selects a server according to the virtual service, and
* creates a connection entry.
* Protocols supported: TCP, UDP
+ *
+ * Usage of *ignored
+ *
+ * 1 : protocol tried to schedule (eg. on SYN), found svc but the
+ * svc/scheduler decides that this packet should be accepted with
+ * NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 : scheduler can not find destination, so try bypass or
+ * return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 : scheduler tried to schedule but fatal error occurred, eg.
+ * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ * failure such as missing Call-ID, ENOMEM on skb_linearize
+ * or pe_data. In this case we should return NF_DROP without
+ * any attempts to send ICMP with ip_vs_leave.
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
@@ -371,11 +398,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
/*
- * Do not schedule replies from local real server. It is risky
- * for fwmark services but mostly for persistent services.
+ * Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
(cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
@@ -386,10 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Persistent service
*/
- if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
- *ignored = 0;
- return ip_vs_sched_persist(svc, skb, pptr);
- }
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+ *ignored = 0;
/*
* Non-persistent service
@@ -402,8 +427,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
- *ignored = 0;
-
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -423,9 +446,11 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
pptr[0], &iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
- flags, dest);
- if (!cp)
+ flags, dest, skb->mark);
+ if (!cp) {
+ *ignored = -1;
return NULL;
+ }
}
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -489,7 +514,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
- NULL);
+ NULL, skb->mark);
if (!cp)
return NF_DROP;
}
@@ -1535,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*
* Sync connection if it is about to close to
* encorage the standby servers to update the connections timeout
+ *
+ * For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
- pkts = atomic_add_return(1, &cp->in_pkts);
- if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ pkts = sysctl_ip_vs_sync_threshold[0];
+ else
+ pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1552,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if (af == AF_INET &&
- (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5..ca49e92 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -92,7 +92,7 @@ int sysctl_ip_vs_nat_icmp_send = 0;
int sysctl_ip_vs_conntrack;
#endif
int sysctl_ip_vs_snat_reroute = 1;
-
+int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -655,12 +655,12 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
- __be16 vport, __u16 protocol)
+ __be16 vport, __u16 protocol, __u32 fwmark)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+ svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -1137,7 +1137,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
}
if (u->pe_name && *u->pe_name) {
- pe = ip_vs_pe_get(u->pe_name);
+ pe = ip_vs_pe_getbyname(u->pe_name);
if (pe == NULL) {
pr_info("persistence engine module ip_vs_pe_%s "
"not found\n", u->pe_name);
@@ -1248,7 +1248,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = sched;
if (u->pe_name && *u->pe_name) {
- pe = ip_vs_pe_get(u->pe_name);
+ pe = ip_vs_pe_getbyname(u->pe_name);
if (pe == NULL) {
pr_info("persistence engine module ip_vs_pe_%s "
"not found\n", u->pe_name);
@@ -1534,6 +1534,25 @@ proc_do_sync_threshold(ctl_table *table, int write,
return rc;
}
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ if ((*valp < 0) || (*valp > 1)) {
+ /* Restore the correct value */
+ *valp = val;
+ } else {
+ ip_vs_sync_switch_mode(val);
+ }
+ }
+ return rc;
+}
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
@@ -1600,6 +1619,13 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .procname = "sync_version",
+ .data = &sysctl_ip_vs_sync_ver,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_sync_mode,
+ },
#if 0
{
.procname = "timeout_established",
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7545500..84aef65 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
- cp->dest);
+ cp->dest, skb->mark);
if (!n_cp)
return 0;
@@ -365,7 +365,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
htons(ntohs(cp->dport)-1),
- IP_VS_CONN_F_NFCT, cp->dest);
+ IP_VS_CONN_F_NFCT, cp->dest,
+ skb->mark);
if (!n_cp)
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af7..5cf859c 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
}
/* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{
struct ip_vs_pe *pe;
- IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+ IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
}
/* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
{
struct ip_vs_pe *pe;
/* Search for the pe by name */
- pe = ip_vs_pe_getbyname(name);
+ pe = __ip_vs_pe_getbyname(name);
/* If pe not found, load the module and search again */
if (!pe) {
request_module("ip_vs_pe_%s", name);
- pe = ip_vs_pe_getbyname(name);
+ pe = __ip_vs_pe_getbyname(name);
}
return pe;
}
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
- if (pe && pe->module)
- module_put(pe->module);
-}
-
/* Register a pe in the pe list */
int register_ip_vs_pe(struct ip_vs_pe *pe)
{
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e96..0d83bc0 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
+ int retc;
ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
if (dataoff >= skb->len)
return -EINVAL;
+ if ((retc=skb_linearize(skb)) < 0)
+ return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd..a315159 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -47,13 +47,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
-
+ /* NF_ACCEPT */
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200..1cdab12 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -64,12 +64,18 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a0..cd398de 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -63,12 +63,18 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aed..c1c167a 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
* high-performance and highly available server based on a
* cluster of servers.
*
+ * Version 1, is capable of handling both version 0 and 1 messages.
+ * Version 0 is the plain old format.
+ * Note Version 0 receivers will just drop Ver 1 messages.
+ * Version 1 is capable of handle IPv6, Persistence data,
+ * time-outs, and firewall marks.
+ * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions Message: is a complete datagram
+ * Sync_conn: is a part of a Message
+ * Param Data is an option to a Sync_conn.
+ *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
* Alexandre Cassen : Added SyncID support for incoming sync
* messages filtering.
* Justin Ossevoort : Fix endian problem on sync message size.
+ * Hans Schillstrom : Added Version 1: i.e. IPv6,
+ * Persistence support, fwmark and time-out.
*/
#define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
#include <linux/wait.h>
#include <linux/kernel.h>
+#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
+
#include <net/ip.h>
#include <net/sock.h>
@@ -43,11 +59,13 @@
#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
#define IP_VS_SYNC_PORT 8848 /* multicast port */
+#define SYNC_PROTO_VER 1 /* Protocol version in header */
/*
* IPVS sync connection entry
+ * Version 0, i.e. original version.
*/
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
__u8 reserved;
/* Protocol, addresses and port numbers */
@@ -71,41 +89,158 @@ struct ip_vs_sync_conn_options {
struct ip_vs_seq out_seq; /* outgoing seq. struct */
};
+/*
+ Sync Connection format (sync_conn)
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Protocol | Ver. | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | State | cport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | vport | dport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | fwmark |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | timeout (in sec.) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ... |
+ | IP-Addresses (v4 or v6) |
+ | ... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ Optional Parameters.
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param. Type | Param. Length | Param. data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+ | ... |
+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | Param Type | Param. Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param data |
+ | Last Param data should be padded for 32 bit alignment |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ * Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ struct in6_addr caddr; /* client address */
+ struct in6_addr vaddr; /* virtual address */
+ struct in6_addr daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+ struct ip_vs_sync_v4 v4;
+ struct ip_vs_sync_v6 v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6 0
+#define STYPE_F_INET6 (1 << STYPE_INET6)
+
+#define SVER_SHIFT 12 /* Shift to get version */
+#define SVER_MASK 0x0fff /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA 1
+#define IPVS_OPT_PE_DATA 2
+#define IPVS_OPT_PE_NAME 3
+#define IPVS_OPT_PARAM 7
+
+#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
+
struct ip_vs_sync_thread_data {
struct socket *sock;
char *buf;
};
-#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
#define FULL_CONN_SIZE \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
/*
- The master mulitcasts messages to the backup load balancers in the
- following format.
+ The master mulitcasts messages (Datagrams) to the backup load balancers
+ in the following format.
+
+ Version 1:
+ Note, first byte should be Zero, so ver 0 receivers will drop the packet.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Count Conns | SyncID | Size |
+ | 0 | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | Version | Reserved, set to Zero |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (1) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
- | . |
+ ~ . ~
| . |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (n) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | IPVS Sync Connection (1) |
*/
#define SYNC_MESG_HEADER_LEN 4
#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
__u8 nr_conns;
__u8 syncid;
__u16 size;
@@ -113,6 +248,17 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+ __u8 reserved; /* must be zero */
+ __u8 syncid;
+ __u16 size;
+ __u8 nr_conns;
+ __s8 version; /* SYNC_PROTO_VER */
+ __u16 spare;
+ /* ip_vs_sync_conn entries start here */
+};
+
/* the maximum length of sync (sending/receiving) message */
static int sync_send_mesg_maxlen;
static int sync_recv_mesg_maxlen;
@@ -156,6 +302,27 @@ static struct sockaddr_in mcast_addr = {
.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
};
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+ ho->init_seq = get_unaligned_be32(&no->init_seq);
+ ho->delta = get_unaligned_be32(&no->delta);
+ ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+ put_unaligned_be32(ho->init_seq, &no->init_seq);
+ put_unaligned_be32(ho->delta, &no->delta);
+ put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
@@ -175,6 +342,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
return sb;
}
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
{
struct ip_vs_sync_buff *sb;
@@ -186,11 +356,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
kfree(sb);
return NULL;
}
- sb->mesg->nr_conns = 0;
+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
+ sb->mesg->version = SYNC_PROTO_VER;
sb->mesg->syncid = ip_vs_master_syncid;
- sb->mesg->size = 4;
- sb->head = (unsigned char *)sb->mesg + 4;
+ sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
+ sb->mesg->nr_conns = 0;
+ sb->mesg->spare = 0;
+ sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+
sb->firstuse = jiffies;
return sb;
}
@@ -231,20 +405,77 @@ get_curr_sync_buff(unsigned long time)
return sb;
}
+/*
+ * Switch mode from sending version 0 or 1
+ * - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(int mode) {
+
+ if (!ip_vs_sync_state & IP_VS_STATE_MASTER)
+ return;
+ if (mode == sysctl_ip_vs_sync_ver || !curr_sb)
+ return;
+
+ spin_lock_bh(&curr_sb_lock);
+ /* Buffer empty ? then let buf_create do the job */
+ if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+ kfree(curr_sb);
+ curr_sb = NULL;
+ } else {
+ spin_lock_bh(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&curr_sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(curr_sb);
+ spin_unlock_bh(&ip_vs_sync_lock);
+ }
+ spin_unlock_bh(&curr_sb_lock);
+}
/*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
+{
+ struct ip_vs_sync_buff *sb;
+ struct ip_vs_sync_mesg_v0 *mesg;
+
+ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+ return NULL;
+
+ if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ kfree(sb);
+ return NULL;
+ }
+ mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+ mesg->nr_conns = 0;
+ mesg->syncid = ip_vs_master_syncid;
+ mesg->size = 4;
+ sb->head = (unsigned char *)mesg + 4;
+ sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen;
+ sb->firstuse = jiffies;
+ return sb;
+}
+
+/*
+ * Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
- * Called by ip_vs_in.
*/
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
{
- struct ip_vs_sync_mesg *m;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_mesg_v0 *m;
+ struct ip_vs_sync_conn_v0 *s;
int len;
+ if (unlikely(cp->af != AF_INET))
+ return;
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ return;
+
spin_lock(&curr_sb_lock);
if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create())) {
+ if (!(curr_sb=ip_vs_sync_buff_create_v0())) {
spin_unlock(&curr_sb_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
@@ -253,10 +484,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn *)curr_sb->head;
+ m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg;
+ s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
/* copy members */
+ s->reserved = 0;
s->protocol = cp->protocol;
s->cport = cp->cport;
s->vport = cp->vport;
@@ -277,7 +509,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
curr_sb->head += len;
/* check if there is a space for next one */
- if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+ if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) {
sb_queue_tail(curr_sb);
curr_sb = NULL;
}
@@ -288,69 +520,343 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
ip_vs_sync_conn(cp->control);
}
+/*
+ * Add an ip_vs_conn information into the current sync_buff.
+ * Called by ip_vs_in.
+ * Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+ struct ip_vs_sync_mesg *m;
+ union ip_vs_sync_conn *s;
+ __u8 *p;
+ unsigned int len, pe_name_len, pad;
+
+ /* Handle old version of the protocol */
+ if (sysctl_ip_vs_sync_ver == 0) {
+ ip_vs_sync_conn_v0(cp);
+ return;
+ }
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ goto control;
+sloop:
+ /* Sanity checks */
+ pe_name_len = 0;
+ if (cp->pe_data_len) {
+ if (!cp->pe_data || !cp->dest) {
+ IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+ return;
+ }
+ pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+ }
+
+ spin_lock(&curr_sb_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ len = sizeof(struct ip_vs_sync_v6);
+ else
+#endif
+ len = sizeof(struct ip_vs_sync_v4);
+
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+ len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+ if (cp->pe_data_len)
+ len += cp->pe_data_len + 2; /* + Param hdr field */
+ if (pe_name_len)
+ len += pe_name_len + 2;
+
+ /* check if there is a space for this one */
+ pad = 0;
+ if (curr_sb) {
+ pad = (4 - (size_t)curr_sb->head) & 3;
+ if (curr_sb->head + len + pad > curr_sb->end) {
+ sb_queue_tail(curr_sb);
+ curr_sb = NULL;
+ pad = 0;
+ }
+ }
+
+ if (!curr_sb) {
+ if (!(curr_sb=ip_vs_sync_buff_create())) {
+ spin_unlock(&curr_sb_lock);
+ pr_err("ip_vs_sync_buff_create failed.\n");
+ return;
+ }
+ }
+
+ m = curr_sb->mesg;
+ p = curr_sb->head;
+ curr_sb->head += pad + len;
+ m->size += pad + len;
+ /* Add ev. padding from prev. sync_conn */
+ while (pad--)
+ *(p++) = 0;
+
+ s = (union ip_vs_sync_conn *)p;
+
+ /* Set message type & copy members */
+ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+ s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
+ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+ s->v4.state = htons(cp->state);
+ s->v4.protocol = cp->protocol;
+ s->v4.cport = cp->cport;
+ s->v4.vport = cp->vport;
+ s->v4.dport = cp->dport;
+ s->v4.fwmark = htonl(cp->fwmark);
+ s->v4.timeout = htonl(cp->timeout / HZ);
+ m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6) {
+ p += sizeof(struct ip_vs_sync_v6);
+ ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+ ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+ ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+ } else
+#endif
+ {
+ p += sizeof(struct ip_vs_sync_v4); /* options ptr */
+ s->v4.caddr = cp->caddr.ip;
+ s->v4.vaddr = cp->vaddr.ip;
+ s->v4.daddr = cp->daddr.ip;
+ }
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ *(p++) = IPVS_OPT_SEQ_DATA;
+ *(p++) = sizeof(struct ip_vs_sync_conn_options);
+ hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+ p += sizeof(struct ip_vs_seq);
+ hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+ p += sizeof(struct ip_vs_seq);
+ }
+ /* Handle pe data */
+ if (cp->pe_data_len && cp->pe_data) {
+ *(p++) = IPVS_OPT_PE_DATA;
+ *(p++) = cp->pe_data_len;
+ memcpy(p, cp->pe_data, cp->pe_data_len);
+ p += cp->pe_data_len;
+ if (pe_name_len) {
+ /* Add PE_NAME */
+ *(p++) = IPVS_OPT_PE_NAME;
+ *(p++) = pe_name_len;
+ memcpy(p, cp->pe->name, pe_name_len);
+ p += pe_name_len;
+ }
+ }
+
+ spin_unlock(&curr_sb_lock);
+
+control:
+ /* synchronize its controller if it has */
+ cp = cp->control;
+ if (!cp)
+ return;
+ /*
+ * Reduce sync rate for templates
+ * i.e only increment in_pkts for Templates.
+ */
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+ int pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
+ return;
+ }
+ goto sloop;
+}
+
+/*
+ * fill_param used by version 1
+ */
static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
- const union nf_inet_addr *caddr, __be16 cport,
- const union nf_inet_addr *vaddr, __be16 vport,
- struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
+ struct ip_vs_conn_param *p,
+ __u8 *pe_data, unsigned int pe_data_len,
+ __u8 *pe_name, unsigned int pe_name_len)
{
- /* XXX: Need to take into account persistence engine */
- ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_conn_fill_param(af, sc->v6.protocol,
+ (const union nf_inet_addr *)&sc->v6.caddr,
+ sc->v6.cport,
+ (const union nf_inet_addr *)&sc->v6.vaddr,
+ sc->v6.vport, p);
+ else
+#endif
+ ip_vs_conn_fill_param(af, sc->v4.protocol,
+ (const union nf_inet_addr *)&sc->v4.caddr,
+ sc->v4.cport,
+ (const union nf_inet_addr *)&sc->v4.vaddr,
+ sc->v4.vport, p);
+ /* Handle pe data */
+ if (pe_data_len) {
+ if (pe_name_len) {
+ char buff[IP_VS_PENAME_MAXLEN+1];
+
+ memcpy(buff, pe_name, pe_name_len);
+ buff[pe_name_len]=0;
+ p->pe = __ip_vs_pe_getbyname(buff);
+ if (!p->pe) {
+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff);
+ return 1;
+ }
+ } else {
+ IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+ return 1;
+ }
+
+ p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+ if (!p->pe_data) {
+ if (p->pe->module)
+ module_put(p->pe->module);
+ return -ENOMEM;
+ }
+ memcpy(p->pe_data, pe_data, pe_data_len);
+ p->pe_data_len = pe_data_len;
+ }
return 0;
}
/*
- * Process received multicast message and create the corresponding
- * ip_vs_conn entries.
+ * Connection Add / Update.
+ * Common for version 0 and 1 reception of backup sync_conns.
+ * Param: ...
+ * timeout is in sec.
*/
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
+ unsigned state, unsigned protocol, unsigned type,
+ const union nf_inet_addr *daddr, __be16 dport,
+ unsigned long timeout, __u32 fwmark,
+ struct ip_vs_sync_conn_options *opt,
+ struct ip_vs_protocol *pp)
{
- struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
- struct ip_vs_sync_conn *s;
- struct ip_vs_sync_conn_options *opt;
- struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp;
struct ip_vs_dest *dest;
- struct ip_vs_conn_param param;
- char *p;
- int i;
+ struct ip_vs_conn *cp;
- if (buflen < sizeof(struct ip_vs_sync_mesg)) {
- IP_VS_ERR_RL("sync message header too short\n");
- return;
- }
- /* Convert size back to host byte order */
- m->size = ntohs(m->size);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ cp = ip_vs_conn_in_get(param);
+ else
+ cp = ip_vs_ct_in_get(param);
- if (buflen != m->size) {
- IP_VS_ERR_RL("bogus sync message size\n");
- return;
- }
+ if (cp && param->pe_data) /* Free pe_data */
+ kfree(param->pe_data);
+ if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(type, daddr, dport, param->vaddr,
+ param->vport, protocol, fwmark);
- /* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
- IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
- m->syncid);
- return;
+ /* Set the approprite ativity flag */
+ if (protocol == IPPROTO_TCP) {
+ if (state != IP_VS_TCP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ } else if (protocol == IPPROTO_SCTP) {
+ if (state != IP_VS_SCTP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ if (!cp) {
+ if (param->pe_data)
+ kfree(param->pe_data);
+ IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+ return;
+ }
+ } else if (!cp->dest) {
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+ (cp->state != state)) {
+ /* update active/inactive flag for the connection */
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+ (cp->state != state)) {
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
}
- p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+ if (opt)
+ memcpy(&cp->in_seq, opt, sizeof(*opt));
+ atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ cp->state = state;
+ cp->old_state = cp->state;
+ /*
+ * For Ver 0 messages style
+ * - Not possible to recover the right timeout for templates
+ * - can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ * Ver 1 messages style.
+ * - No problem.
+ */
+ if (timeout) {
+ if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+ timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+ cp->timeout = timeout*HZ;
+ } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+ cp->timeout = pp->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ ip_vs_conn_put(cp);
+}
+
+/*
+ * Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
+{
+ struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+ struct ip_vs_sync_conn_v0 *s;
+ struct ip_vs_sync_conn_options *opt;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn_param param;
+ char *p;
+ int i;
+
+ p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
for (i=0; i<m->nr_conns; i++) {
unsigned flags, state;
if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
return;
}
- s = (struct ip_vs_sync_conn *) p;
+ s = (struct ip_vs_sync_conn_v0 *) p;
flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
flags &= ~IP_VS_CONN_F_HASHED;
if (flags & IP_VS_CONN_F_SEQ_MASK) {
opt = (struct ip_vs_sync_conn_options *)&s[1];
p += FULL_CONN_SIZE;
if (p > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn options in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
return;
}
} else {
@@ -362,12 +868,12 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
pp = ip_vs_proto_get(s->protocol);
if (!pp) {
- IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
s->protocol);
continue;
}
if (state >= pp->num_states) {
- IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
pp->name, state);
continue;
}
@@ -375,105 +881,273 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
/* protocol in templates is not used for state/timeout */
pp = NULL;
if (state > 0) {
- IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
state = 0;
}
}
- {
- if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
- (union nf_inet_addr *)&s->caddr,
- s->cport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport, &param)) {
- pr_err("ip_vs_conn_fill_param_sync failed");
- return;
+ ip_vs_conn_fill_param(AF_INET, s->protocol,
+ (const union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (const union nf_inet_addr *)&s->vaddr,
+ s->vport, &param);
+
+ /* Send timeout as Zero */
+ ip_vs_proc_conn(&param, flags, state, s->protocol, AF_INET,
+ (union nf_inet_addr *)&s->daddr, s->dport,
+ 0, 0, opt, pp);
+ }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+ __u32 *opt_flags,
+ struct ip_vs_sync_conn_options *opt)
+{
+ struct ip_vs_sync_conn_options *topt;
+
+ topt = (struct ip_vs_sync_conn_options *)p;
+
+ if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+ IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+ return -EINVAL;
+ }
+ if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+ IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+ return -EINVAL;
+ }
+ ntoh_seq(&topt->in_seq, &opt->in_seq);
+ ntoh_seq(&topt->out_seq, &opt->out_seq);
+ *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+ return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+ __u8 **data, unsigned int maxlen,
+ __u32 *opt_flags, __u32 flag)
+{
+ if (plen > maxlen) {
+ IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+ return -EINVAL;
+ }
+ if (*opt_flags & flag) {
+ IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+ return -EINVAL;
+ }
+ *data_len = plen;
+ *data = p;
+ *opt_flags |= flag;
+ return 0;
+}
+/*
+ * Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
+{
+ struct ip_vs_sync_conn_options opt;
+ union ip_vs_sync_conn *s;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn_param param;
+ __u32 flags;
+ unsigned int af, state, pe_data_len=0, pe_name_len=0;
+ __u8 *pe_data=NULL, *pe_name=NULL;
+ __u32 opt_flags=0;
+ int retc=0;
+
+ s = (union ip_vs_sync_conn *) p;
+
+ if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+ af = AF_INET6;
+ p += sizeof(struct ip_vs_sync_v6);
+#else
+ IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+ retc = 10;
+ goto out;
+#endif
+ } else if (!s->v4.type) {
+ af = AF_INET;
+ p += sizeof(struct ip_vs_sync_v4);
+ } else {
+ return -10;
+ }
+ if (p > msg_end)
+ return -20;
+
+ /* Process optional params check Type & Len. */
+ while (p < msg_end) {
+ int ptype;
+ int plen;
+
+ if (p+2 > msg_end)
+ return -30;
+ ptype = *(p++);
+ plen = *(p++);
+
+ if (!plen || ((p + plen) > msg_end))
+ return -40;
+ /* Handle seq option p = param data */
+ switch (ptype & ~IPVS_OPT_F_PARAM) {
+ case IPVS_OPT_SEQ_DATA:
+ if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+ return -50;
+ break;
+
+ case IPVS_OPT_PE_DATA:
+ if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+ IP_VS_PEDATA_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_DATA))
+ return -60;
+ break;
+
+ case IPVS_OPT_PE_NAME:
+ if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+ IP_VS_PENAME_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_NAME))
+ return -70;
+ break;
+
+ default:
+ /* Param data mandatory ? */
+ if (!(ptype & IPVS_OPT_F_PARAM)) {
+ IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+ ptype & ~IPVS_OPT_F_PARAM);
+ retc = 20;
+ goto out;
}
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
- cp = ip_vs_conn_in_get(&param);
- else
- cp = ip_vs_ct_in_get(&param);
}
- if (!cp) {
- /*
- * Find the appropriate destination for the connection.
- * If it is not found the connection will remain unbound
- * but still handled.
- */
- dest = ip_vs_find_dest(AF_INET,
- (union nf_inet_addr *)&s->daddr,
- s->dport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport,
- s->protocol);
- /* Set the approprite ativity flag */
- if (s->protocol == IPPROTO_TCP) {
- if (state != IP_VS_TCP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- } else if (s->protocol == IPPROTO_SCTP) {
- if (state != IP_VS_SCTP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
+ p += plen; /* Next option */
+ }
+
+ /* Get flags and Mask off unsupported */
+ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+ flags |= IP_VS_CONN_F_SYNC;
+ state = ntohs(s->v4.state);
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->v4.protocol);
+ if (!pp) {
+ IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+ s->v4.protocol);
+ retc = 30;
+ goto out;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+ pp->name, state);
+ retc = 40;
+ goto out;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ pp = NULL;
+ if (state > 0) {
+ IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+ state);
+ state = 0;
+ }
+ }
+ if (ip_vs_conn_fill_param_sync(af, s, &param,
+ pe_data, pe_data_len,
+ pe_name, pe_name_len)) {
+ retc = 50;
+ goto out;
+ }
+ /* If only IPv4, just silent skip IPv6 */
+ if (af == AF_INET)
+ ip_vs_proc_conn(&param, flags, state, s->v4.protocol, af,
+ (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+ ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+ pp);
+#ifdef CONFIG_IP_VS_IPV6
+ else
+ ip_vs_proc_conn(&param, flags, state, s->v6.protocol, af,
+ (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+ ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+ pp);
+#endif
+ return 0;
+ /* Error exit */
+out:
+ IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+ return retc;
+
+}
+/*
+ * Process received multicast message and create the corresponding
+ * ip_vs_conn entries.
+ * Handles Version 0 & 1
+ */
+static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
+{
+ struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+ __u8 *p, *msg_end;
+ int i, nr_conns;
+
+ if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+ IP_VS_DBG(2, "BACKUP, message header too short\n");
+ return;
+ }
+ /* Convert size back to host byte order */
+ m2->size = ntohs(m2->size);
+
+ if (buflen != m2->size) {
+ IP_VS_DBG(2, "BACKUP, bogus message size\n");
+ return;
+ }
+ /* SyncID sanity check */
+ if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) {
+ IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+ return;
+ }
+ /* Handle version 1 message */
+ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+ && (m2->spare == 0)) {
+
+ msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+ nr_conns = m2->nr_conns;
+
+ for (i=0; i<nr_conns; i++) {
+ union ip_vs_sync_conn *s;
+ unsigned size;
+ int retc;
+
+ p = msg_end;
+ if (p + sizeof(s->v4) > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+ return;
}
- cp = ip_vs_conn_new(&param,
- (union nf_inet_addr *)&s->daddr,
- s->dport, flags, dest);
- if (dest)
- atomic_dec(&dest->refcnt);
- if (!cp) {
- pr_err("ip_vs_conn_new failed\n");
+ s = (union ip_vs_sync_conn *)p;
+ size = ntohs(s->v4.ver_size) & SVER_MASK;
+ msg_end = p + size;
+ /* Basic sanity checks */
+ if (msg_end > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
return;
}
- } else if (!cp->dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
- (cp->state != state)) {
- /* update active/inactive flag for the connection */
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_TCP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags |= IP_VS_CONN_F_INACTIVE;
- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state == IP_VS_TCP_S_ESTABLISHED)) {
- atomic_inc(&dest->activeconns);
- atomic_dec(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+ ntohs(s->v4.ver_size) >> SVER_SHIFT);
+ return;
}
- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
- (cp->state != state)) {
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_SCTP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ /* Process a single sync_conn */
+ if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+ retc);
+ return;
}
+ /* Make sure we have 32 bit alignment */
+ msg_end = p + ((size + 3) & ~3);
}
-
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
- cp->state = state;
- cp->old_state = cp->state;
- /*
- * We can not recover the right timeout for templates
- * in all cases, we can not find the right fwmark
- * virtual service. If needed, we can do it for
- * non-fwmark persistent services.
- */
- if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
- else
- cp->timeout = (3*60*HZ);
- ip_vs_conn_put(cp);
+ } else {
+ /* Old type of message */
+ ip_vs_process_message_v0(buffer, buflen);
+ return;
}
}
@@ -851,7 +1525,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
+ sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
if (sync_master_thread)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fb..1f2a4e3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
.fl4_tos = RT_TOS(iph->tos),
.mark = skb->mark,
};
- struct rtable *rt;
if (ip_route_output_key(net, &rt, &fl))
return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
"ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
df |= (old_iph->frag_off & htons(IP_DF));
- if ((old_iph->frag_off & htons(IP_DF))
- && mtu < ntohs(old_iph->tot_len)) {
+ if ((old_iph->frag_off & htons(IP_DF) &&
+ mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
- if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+ if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+ !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e615119..e95ac42 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -655,7 +655,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
* and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
*/
memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
- sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+ offsetof(struct nf_conn, proto) -
+ offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
spin_lock_init(&ct->lock);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0b..4a9ed23 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
- atomic_inc(&exp->use);
+ /* two references : one for hash insert, one for the timer */
+ atomic_add(2, &exp->use);
if (master_help) {
hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
if (master_help) {
- p = &master_help->helper->expect_policy[exp->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[exp->class];
exp->timeout.expires = jiffies + p->timeout * HZ;
}
add_timer(&exp->timeout);
- atomic_inc(&exp->use);
NF_CT_STAT_INC(net, expect_create);
}
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
if (!del_timer(&i->timeout))
return 0;
- p = &master_help->helper->expect_policy[i->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[i->class];
i->timeout.expires = jiffies + p->timeout * HZ;
add_timer(&i->timeout);
return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
/* Will be over limit? */
if (master_help) {
- p = &master_help->helper->expect_policy[expect->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[expect->class];
if (p->max_expected &&
master_help->expecting[expect->class] >= p->max_expected) {
evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
if (n)
return n;
}
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
}
return head;
}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450..80a23ed 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
/* This assumes that extended areas in conntrack for the types
whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
for (i = min; i <= max; i++) {
- t1 = nf_ct_ext_types[i];
+ t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+ lockdep_is_held(&nf_ct_ext_type_mutex));
if (!t1)
continue;
- t1->alloc_size = sizeof(struct nf_ct_ext)
- + ALIGN(sizeof(struct nf_ct_ext), t1->align)
- + t1->len;
+ t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+ t1->len;
for (j = 0; j < NF_CT_EXT_NUM; j++) {
- t2 = nf_ct_ext_types[j];
+ t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+ lockdep_is_held(&nf_ct_ext_type_mutex));
if (t2 == NULL || t2 == t1 ||
(t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4c..767bbe9 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -158,7 +158,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && help->helper == me) {
+ if (help && rcu_dereference_protected(
+ help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ ) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
rcu_assign_pointer(help->helper, NULL);
}
@@ -210,7 +213,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
hlist_for_each_entry_safe(exp, n, next,
&net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
- if ((help->helper == me || exp->helper == me) &&
+ if ((rcu_dereference_protected(
+ help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ ) == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0cdba50..9eabaa6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1378,6 +1378,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
#endif
+ memset(&ct->proto, 0, sizeof(ct->proto));
if (cda[CTA_PROTOINFO]) {
err = ctnetlink_change_protoinfo(ct, cda);
if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74..5701c8d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
+ struct nf_conntrack_l3proto *old;
if (proto->l3proto >= AF_MAX)
return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
return -EINVAL;
mutex_lock(&nf_ct_proto_mutex);
- if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+ old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+ lockdep_is_held(&nf_ct_proto_mutex));
+ if (old != &nf_conntrack_l3proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
BUG_ON(proto->l3proto >= AF_MAX);
mutex_lock(&nf_ct_proto_mutex);
- BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+ BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
mutex_lock(&nf_ct_proto_mutex);
if (!nf_ct_protos[l4proto->l3proto]) {
/* l3proto may be loaded latter. */
- struct nf_conntrack_l4proto **proto_array;
+ struct nf_conntrack_l4proto __rcu **proto_array;
int i;
proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
}
for (i = 0; i < MAX_NF_CT_PROTO; i++)
- proto_array[i] = &nf_conntrack_l4proto_generic;
+ RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
/* Before making proto_array visible to lockless readers,
* we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
smp_wmb();
nf_ct_protos[l4proto->l3proto] = proto_array;
- } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
- &nf_conntrack_l4proto_generic) {
+ } else if (rcu_dereference_protected(
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != &nf_conntrack_l4proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
BUG_ON(l4proto->l3proto >= PF_MAX);
mutex_lock(&nf_ct_proto_mutex);
- BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+ BUG_ON(rcu_dereference_protected(
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560..9ae57c5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
ct->proto.dccp.state = CT_DCCP_NONE;
+ ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+ ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+ ct->proto.dccp.handshake_seq = 0;
return true;
out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2..6f4ee70 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
test_bit(SCTP_CID_COOKIE_ACK, map))
return false;
+ memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
new_state = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73..6f38d0e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
BUG_ON(th == NULL);
/* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
+ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
}
if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/* SYN packet */
ct->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[0].td_end;
tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
- ct->proto.tcp.seen[1].flags = 0;
} else if (nf_ct_tcp_loose == 0) {
/* Don't try to pick up connections. */
return false;
} else {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/*
* We are in the middle of a connection,
* its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[0].td_maxend =
ct->proto.tcp.seen[0].td_end +
ct->proto.tcp.seen[0].td_maxwin;
- ct->proto.tcp.seen[0].td_scale = 0;
/* We assume SACK and liberal window checking to handle
* window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
IP_CT_TCP_FLAG_BE_LIBERAL;
}
- ct->proto.tcp.seen[1].td_end = 0;
- ct->proto.tcp.seen[1].td_maxend = 0;
- ct->proto.tcp.seen[1].td_maxwin = 0;
- ct->proto.tcp.seen[1].td_scale = 0;
-
/* tcp_packet will set them */
- ct->proto.tcp.state = TCP_CONNTRACK_NONE;
ct->proto.tcp.last_index = TCP_NONE_SET;
pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f..8257bf6 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/rculist_nulls.h>
MODULE_LICENSE("GPL");
@@ -56,7 +57,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
for (st->bucket = 0;
st->bucket < net->ct.htable_size;
st->bucket++) {
- n = rcu_dereference(net->ct.hash[st->bucket].first);
+ n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -69,13 +70,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= net->ct.htable_size)
return NULL;
}
- head = rcu_dereference(net->ct.hash[st->bucket].first);
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(
+ &net->ct.hash[st->bucket]));
}
return head;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393e..20c775c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
struct nf_logger *t;
int ret;
- logger = nf_loggers[*pos];
+ logger = rcu_dereference_protected(nf_loggers[*pos],
+ lockdep_is_held(&nf_log_mutex));
if (!logger)
ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
- logger = nf_loggers[tindex];
+ logger = rcu_dereference_protected(nf_loggers[tindex],
+ lockdep_is_held(&nf_log_mutex));
if (!logger)
table->data = "NONE";
else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed..1876f74 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
{
int ret;
+ const struct nf_queue_handler *old;
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
- if (queue_handler[pf] == qh)
+ old = rcu_dereference_protected(queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex));
+ if (old == qh)
ret = -EEXIST;
- else if (queue_handler[pf])
+ else if (old)
ret = -EBUSY;
else {
rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
{
+ const struct nf_queue_handler *old;
+
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
- if (queue_handler[pf] && queue_handler[pf] != qh) {
+ old = rcu_dereference_protected(queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex));
+ if (old && old != qh) {
mutex_unlock(&queue_handler_mutex);
return -EINVAL;
}
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
mutex_lock(&queue_handler_mutex);
for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
- if (queue_handler[pf] == qh)
+ if (rcu_dereference_protected(
+ queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex)
+ ) == qh)
rcu_assign_pointer(queue_handler[pf], NULL);
}
mutex_unlock(&queue_handler_mutex);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b..91592da 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
if (!hlist_empty(&instance_table[st->bucket]))
- return rcu_dereference_bh(instance_table[st->bucket].first);
+ return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
}
return NULL;
}
static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
{
- h = rcu_dereference_bh(h->next);
+ h = rcu_dereference_bh(hlist_next_rcu(h));
while (!h) {
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = rcu_dereference_bh(instance_table[st->bucket].first);
+ h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
}
return h;
}
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4a..af9c4da 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Xtables: Qdisc classification");
MODULE_ALIAS("ipt_CLASSIFY");
MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
static unsigned int
classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static struct xt_target classify_tg_reg __read_mostly = {
- .name = "CLASSIFY",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .table = "mangle",
- .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
- (1 << NF_INET_POST_ROUTING),
- .target = classify_tg,
- .targetsize = sizeof(struct xt_classify_target_info),
- .me = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+ {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_POST_ROUTING),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_ARP,
+ .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+ },
};
static int __init classify_tg_init(void)
{
- return xt_register_target(&classify_tg_reg);
+ return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
}
static void __exit classify_tg_exit(void)
{
- xt_unregister_target(&classify_tg_reg);
+ xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
}
module_init(classify_tg_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1..3962770 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,10 +72,12 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
if (info->queues_total > 1) {
if (par->family == NFPROTO_IPV4)
- queue = hash_v4(skb) % info->queues_total + queue;
+ queue = (((u64) hash_v4(skb) * info->queues_total) >>
+ 32) + queue;
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
else if (par->family == NFPROTO_IPV6)
- queue = hash_v6(skb) % info->queues_total + queue;
+ queue = (((u64) hash_v6(skb) * info->queues_total) >>
+ 32) + queue;
#endif
}
return NF_QUEUE_NR(queue);