From 9846ada138accc63994b57ebdfa76e3e137729e2 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 8 Mar 2011 15:37:27 +0100 Subject: netfilter: ipset: fix the compile warning in ip_set_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netfilter/ipset/ip_set_core.c:615: warning: ‘clash’ may be used uninitialized in this function Signed-off-by: Shan Wei Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8b1a54c..618a615 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -612,7 +612,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set *set, *clash; + struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; const char *name, *typename; -- cgit v0.10.2 From adb00ae2ea0ec65f9d3d06079950c0f0ade3b614 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Mar 2011 14:14:26 +0100 Subject: netfilter: x_tables: misuse of try_then_request_module Since xt_find_match() returns ERR_PTR(xx) on error not NULL, the macro try_then_request_module won't work correctly here. The macro expects its first argument will be zero if condition fails. But ERR_PTR(-ENOENT) is not zero. The correct solution is to propagate the error value back. Found by inspection, and compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0a77d2f..271eed3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,7 +183,7 @@ EXPORT_SYMBOL(xt_unregister_matches); /* * These are weird, but module loading must not be done with mutex * held (since they will register), and we have to have a single - * function to use try_then_request_module(). + * function to use. */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ @@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; - match = try_then_request_module(xt_find_match(nfproto, name, revision), - "%st_%s", xt_prefix[nfproto], name); - return (match != NULL) ? match : ERR_PTR(-ENOENT); + match = xt_find_match(nfproto, name, revision); + if (IS_ERR(match)) { + request_module("%st_%s", xt_prefix[nfproto], name); + match = xt_find_match(nfproto, name, revision); + } + + return match; } EXPORT_SYMBOL_GPL(xt_request_find_match); @@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; - target = try_then_request_module(xt_find_target(af, name, revision), - "%st_%s", xt_prefix[af], name); - return (target != NULL) ? target : ERR_PTR(-ENOENT); + target = xt_find_target(af, name, revision); + if (IS_ERR(target)) { + request_module("%st_%s", xt_prefix[af], name); + target = xt_find_target(af, name, revision); + } + + return target; } EXPORT_SYMBOL_GPL(xt_request_find_target); -- cgit v0.10.2 From 42046e2e45c109ba703993c510401a11f716c8df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Mar 2011 19:11:44 +0100 Subject: netfilter: x_tables: return -ENOENT for non-existant matches/targets As Stephen correctly points out, we need to return -ENOENT in xt_find_match()/xt_find_target() after the patch "netfilter: x_tables: misuse of try_then_request_module" in order to properly indicate a non-existant module to the caller. Signed-off-by: Patrick McHardy diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 271eed3..a9adf4c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -190,7 +190,7 @@ EXPORT_SYMBOL(xt_unregister_matches); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); @@ -235,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match); struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); -- cgit v0.10.2 From fe8f661f2c2bb058822f13f6f232e121bde1338f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 14 Mar 2011 19:20:44 +0100 Subject: netfilter: nf_conntrack: fix sysctl memory leak Message in log because sysctl table was not empty at netns exit WARNING: at net/sysctl_net.c:84 sysctl_net_exit+0x2a/0x2c() Instrumenting showed that the nf_conntrack_timestamp was the entry that was being created but not cleared. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1909311..1181236 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1294,6 +1294,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_ecache_fini(net); + nf_conntrack_tstamp_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); -- cgit v0.10.2 From 097fc76a0805bdca17baf12cad9d3bcb215716a9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:26:17 +0200 Subject: ipvs: avoid lookup for fwmark 0 Restore the previous behaviour to lookup for fwmark service only when fwmark is non-null. This saves only CPU. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c73b0c8..f0369d6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -411,9 +411,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - svc = __ip_vs_svc_fwm_find(net, af, fwmark); - if (fwmark && svc) - goto out; + if (fwmark) { + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (svc) + goto out; + } /* * Check the table hashed by -- cgit v0.10.2 From 4a569c0c0f833adace1e3aadaa38780ec2fcdf9e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:28:20 +0200 Subject: ipvs: remove _bh from percpu stats reading ip_vs_read_cpu_stats is called only from timer, so no need for _bh locks. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f560a05..88bd716 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts += s->ustats.inpkts; sum->outpkts += s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); inbytes = s->ustats.inbytes; outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); sum->inbytes += inbytes; sum->outbytes += outbytes; } else { @@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts = s->ustats.inpkts; sum->outpkts = s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); sum->inbytes = s->ustats.inbytes; sum->outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); } } } -- cgit v0.10.2 From 6060c74a3de8ed142c78133e2829e74711f77387 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 7 Mar 2011 10:11:34 +0800 Subject: netfilter:ipvs: use kmemdup The semantic patch that makes this output is available in scripts/coccinelle/api/memdup.cocci. More information about semantic patching is available at http://coccinelle.lip6.fr/ Signed-off-by: Shan Wei Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0d83bc0..13d607a 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) return -EINVAL; - p->pe_data = kmalloc(matchlen, GFP_ATOMIC); - if (!p->pe_data) - return -ENOMEM; - /* N.B: pe_data is only set on success, * this allows fallback to the default persistence logic on failure */ - memcpy(p->pe_data, dptr + matchoff, matchlen); + p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC); + if (!p->pe_data) + return -ENOMEM; + p->pe_data_len = matchlen; return 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fecf24d..c5d13b0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, return 1; } - p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); + p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { if (p->pe->module) module_put(p->pe->module); return -ENOMEM; } - memcpy(p->pe_data, pe_data, pe_data_len); p->pe_data_len = pe_data_len; } return 0; -- cgit v0.10.2 From 06b69390a652bfe4fa7e18e27c938e75ffe86ba0 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Wed, 9 Mar 2011 22:55:05 +0100 Subject: IPVS: Fix variable assignment in ip_vs_notrack There's no sense to 'ct = ct = ' in ip_vs_notrack(). Just assign nf_ct_get()'s return value directly to the pointer variable 'ct' once. Signed-off-by: Jesper Juhl Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e74da41e..1dcb75d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1190,7 +1190,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) enum ip_conntrack_info ctinfo; - struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { nf_reset(skb); -- cgit v0.10.2 From 2553d064ff4bf999f369c8c3dfacaa797dbef1d9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:18:07 +0200 Subject: ipvs: move struct netns_ipvs Remove include/net/netns/ip_vs.h because it depends on structures from include/net/ip_vs.h. As ipvs is pointer in struct net it is better to move struct netns_ipvs into include/net/ip_vs.h, so that we can easily use other structures in struct netns_ipvs. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1dcb75d..091ca1f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -803,6 +803,128 @@ struct ip_vs_app { void (*timeout_change)(struct ip_vs_app *app, int flags); }; +/* IPVS in network namespace */ +struct netns_ipvs { + int gen; /* Generation */ + /* + * Hash table: for real service lookups + */ + #define IP_VS_RTAB_BITS 4 + #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) + #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) + + struct list_head rs_table[IP_VS_RTAB_SIZE]; + /* ip_vs_app */ + struct list_head app_list; + struct mutex app_mutex; + struct lock_class_key app_key; /* mutex debuging */ + + /* ip_vs_proto */ + #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ + struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; + /* ip_vs_proto_tcp */ +#ifdef CONFIG_IP_VS_PROTO_TCP + #define TCP_APP_TAB_BITS 4 + #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) + #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) + struct list_head tcp_apps[TCP_APP_TAB_SIZE]; + spinlock_t tcp_app_lock; +#endif + /* ip_vs_proto_udp */ +#ifdef CONFIG_IP_VS_PROTO_UDP + #define UDP_APP_TAB_BITS 4 + #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) + #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) + struct list_head udp_apps[UDP_APP_TAB_SIZE]; + spinlock_t udp_app_lock; +#endif + /* ip_vs_proto_sctp */ +#ifdef CONFIG_IP_VS_PROTO_SCTP + #define SCTP_APP_TAB_BITS 4 + #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) + #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) + /* Hash table for SCTP application incarnations */ + struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; + spinlock_t sctp_app_lock; +#endif + /* ip_vs_conn */ + atomic_t conn_count; /* connection counter */ + + /* ip_vs_ctl */ + struct ip_vs_stats *tot_stats; /* Statistics & est. */ + struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ + seqcount_t *ustats_seq; /* u64 read retry */ + + int num_services; /* no of virtual services */ + /* 1/rate drop and drop-entry variables */ + struct delayed_work defense_work; /* Work handler */ + int drop_rate; + int drop_counter; + atomic_t dropentry; + /* locks in ctl.c */ + spinlock_t dropentry_lock; /* drop entry handling */ + spinlock_t droppacket_lock; /* drop packet handling */ + spinlock_t securetcp_lock; /* state and timeout tables */ + rwlock_t rs_lock; /* real services table */ + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ + struct lock_class_key ctl_key; /* ctl_mutex debuging */ + /* Trash for destinations */ + struct list_head dest_trash; + /* Service counters */ + atomic_t ftpsvc_counter; + atomic_t nullsvc_counter; + + /* sys-ctl struct */ + struct ctl_table_header *sysctl_hdr; + struct ctl_table *sysctl_tbl; + /* sysctl variables */ + int sysctl_amemthresh; + int sysctl_am_droprate; + int sysctl_drop_entry; + int sysctl_drop_packet; + int sysctl_secure_tcp; +#ifdef CONFIG_IP_VS_NFCT + int sysctl_conntrack; +#endif + int sysctl_snat_reroute; + int sysctl_sync_ver; + int sysctl_cache_bypass; + int sysctl_expire_nodest_conn; + int sysctl_expire_quiescent_template; + int sysctl_sync_threshold[2]; + int sysctl_nat_icmp_send; + + /* ip_vs_lblc */ + int sysctl_lblc_expiration; + struct ctl_table_header *lblc_ctl_header; + struct ctl_table *lblc_ctl_table; + /* ip_vs_lblcr */ + int sysctl_lblcr_expiration; + struct ctl_table_header *lblcr_ctl_header; + struct ctl_table *lblcr_ctl_table; + /* ip_vs_est */ + struct list_head est_list; /* estimator list */ + spinlock_t est_lock; + struct timer_list est_timer; /* Estimation timer */ + /* ip_vs_sync */ + struct list_head sync_queue; + spinlock_t sync_lock; + struct ip_vs_sync_buff *sync_buff; + spinlock_t sync_buff_lock; + struct sockaddr_in sync_mcast_addr; + struct task_struct *master_thread; + struct task_struct *backup_thread; + int send_mesg_maxlen; + int recv_mesg_maxlen; + volatile int sync_state; + volatile int master_syncid; + volatile int backup_syncid; + /* multicast interface name */ + char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; + char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; + /* net name space ptr */ + struct net *net; /* Needed by timer routines */ +}; /* * IPVS core functions diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b3b4a34..3ae4919 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -20,7 +20,6 @@ #include #endif #include -#include struct proc_dir_entry; struct net_device; @@ -28,6 +27,7 @@ struct sock; struct ctl_table_header; struct net_generic; struct sock; +struct netns_ipvs; #define NETDEV_HASHBITS 8 diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h deleted file mode 100644 index 259ebac..0000000 --- a/include/net/netns/ip_vs.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * IP Virtual Server - * Data structure for network namspace - * - */ - -#ifndef IP_VS_H_ -#define IP_VS_H_ - -#include -#include -#include -#include -#include -#include - -struct ip_vs_stats; -struct ip_vs_sync_buff; -struct ctl_table_header; - -struct netns_ipvs { - int gen; /* Generation */ - /* - * Hash table: for real service lookups - */ - #define IP_VS_RTAB_BITS 4 - #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) - #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - - struct list_head rs_table[IP_VS_RTAB_SIZE]; - /* ip_vs_app */ - struct list_head app_list; - struct mutex app_mutex; - struct lock_class_key app_key; /* mutex debuging */ - - /* ip_vs_proto */ - #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ - struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; - /* ip_vs_proto_tcp */ -#ifdef CONFIG_IP_VS_PROTO_TCP - #define TCP_APP_TAB_BITS 4 - #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) - #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) - struct list_head tcp_apps[TCP_APP_TAB_SIZE]; - spinlock_t tcp_app_lock; -#endif - /* ip_vs_proto_udp */ -#ifdef CONFIG_IP_VS_PROTO_UDP - #define UDP_APP_TAB_BITS 4 - #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) - #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) - struct list_head udp_apps[UDP_APP_TAB_SIZE]; - spinlock_t udp_app_lock; -#endif - /* ip_vs_proto_sctp */ -#ifdef CONFIG_IP_VS_PROTO_SCTP - #define SCTP_APP_TAB_BITS 4 - #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) - #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) - /* Hash table for SCTP application incarnations */ - struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; - spinlock_t sctp_app_lock; -#endif - /* ip_vs_conn */ - atomic_t conn_count; /* connection counter */ - - /* ip_vs_ctl */ - struct ip_vs_stats *tot_stats; /* Statistics & est. */ - struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ - seqcount_t *ustats_seq; /* u64 read retry */ - - int num_services; /* no of virtual services */ - /* 1/rate drop and drop-entry variables */ - struct delayed_work defense_work; /* Work handler */ - int drop_rate; - int drop_counter; - atomic_t dropentry; - /* locks in ctl.c */ - spinlock_t dropentry_lock; /* drop entry handling */ - spinlock_t droppacket_lock; /* drop packet handling */ - spinlock_t securetcp_lock; /* state and timeout tables */ - rwlock_t rs_lock; /* real services table */ - /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ - struct lock_class_key ctl_key; /* ctl_mutex debuging */ - /* Trash for destinations */ - struct list_head dest_trash; - /* Service counters */ - atomic_t ftpsvc_counter; - atomic_t nullsvc_counter; - - /* sys-ctl struct */ - struct ctl_table_header *sysctl_hdr; - struct ctl_table *sysctl_tbl; - /* sysctl variables */ - int sysctl_amemthresh; - int sysctl_am_droprate; - int sysctl_drop_entry; - int sysctl_drop_packet; - int sysctl_secure_tcp; -#ifdef CONFIG_IP_VS_NFCT - int sysctl_conntrack; -#endif - int sysctl_snat_reroute; - int sysctl_sync_ver; - int sysctl_cache_bypass; - int sysctl_expire_nodest_conn; - int sysctl_expire_quiescent_template; - int sysctl_sync_threshold[2]; - int sysctl_nat_icmp_send; - - /* ip_vs_lblc */ - int sysctl_lblc_expiration; - struct ctl_table_header *lblc_ctl_header; - struct ctl_table *lblc_ctl_table; - /* ip_vs_lblcr */ - int sysctl_lblcr_expiration; - struct ctl_table_header *lblcr_ctl_header; - struct ctl_table *lblcr_ctl_table; - /* ip_vs_est */ - struct list_head est_list; /* estimator list */ - spinlock_t est_lock; - struct timer_list est_timer; /* Estimation timer */ - /* ip_vs_sync */ - struct list_head sync_queue; - spinlock_t sync_lock; - struct ip_vs_sync_buff *sync_buff; - spinlock_t sync_buff_lock; - struct sockaddr_in sync_mcast_addr; - struct task_struct *master_thread; - struct task_struct *backup_thread; - int send_mesg_maxlen; - int recv_mesg_maxlen; - volatile int sync_state; - volatile int master_syncid; - volatile int backup_syncid; - /* multicast interface name */ - char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - /* net name space ptr */ - struct net *net; /* Needed by timer routines */ -}; - -#endif /* IP_VS_H_ */ -- cgit v0.10.2 From 2a0751af09c3099cf2837c623ca5d0436317d02d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:20:35 +0200 Subject: ipvs: reorganize tot_stats The global tot_stats contains cpustats field just like the stats for dest and svc, so better use it to simplify the usage in estimation_timer. As tot_stats is registered as estimator we can remove the special ip_vs_read_cpu_stats call for tot_stats. Fix ip_vs_read_cpu_stats to be called under stats lock because it is still used as synchronization between estimation timer and user context (the stats readers). Also, make sure ip_vs_stats_percpu_show reads properly the u64 stats from user context. Signed-off-by: Julian Anastasov Eric Dumazet Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 091ca1f..9db750d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -851,8 +851,7 @@ struct netns_ipvs { atomic_t conn_count; /* connection counter */ /* ip_vs_ctl */ - struct ip_vs_stats *tot_stats; /* Statistics & est. */ - struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ + struct ip_vs_stats tot_stats; /* Statistics & est. */ seqcount_t *ustats_seq; /* u64 read retry */ int num_services; /* no of virtual services */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2d1f932..6f4940e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; @@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; @@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) s = this_cpu_ptr(svc->stats.cpustats); s->ustats.conns++; - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.conns++; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f0369d6..a2a67ad 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1481,7 +1481,7 @@ static int ip_vs_zero_all(struct net *net) } } - ip_vs_zero_stats(net_ipvs(net)->tot_stats); + ip_vs_zero_stats(&net_ipvs(net)->tot_stats); return 0; } @@ -1963,7 +1963,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2007,7 +2007,8 @@ static const struct file_operations ip_vs_stats_fops = { static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2017,11 +2018,20 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) "CPU Conns Packets Packets Bytes Bytes\n"); for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); + unsigned int start; + __u64 inbytes, outbytes; + + do { + start = u64_stats_fetch_begin_bh(&u->syncp); + inbytes = u->ustats.inbytes; + outbytes = u->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&u->syncp, start)); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)u->ustats.inbytes, - (__u64)u->ustats.outbytes); + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)inbytes, + (__u64)outbytes); } spin_lock_bh(&tot_stats->lock); @@ -3505,17 +3515,12 @@ int __net_init __ip_vs_control_init(struct net *net) atomic_set(&ipvs->nullsvc_counter, 0); /* procfs stats */ - ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); - if (ipvs->tot_stats == NULL) { - pr_err("%s(): no memory.\n", __func__); - return -ENOMEM; - } - ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->cpustats) { + ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!ipvs->tot_stats.cpustats) { pr_err("%s() alloc_percpu failed\n", __func__); goto err_alloc; } - spin_lock_init(&ipvs->tot_stats->lock); + spin_lock_init(&ipvs->tot_stats.lock); proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); @@ -3563,7 +3568,7 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_dup; } #endif - ip_vs_new_estimator(net, ipvs->tot_stats); + ip_vs_new_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3571,9 +3576,8 @@ int __net_init __ip_vs_control_init(struct net *net) return 0; err_dup: - free_percpu(ipvs->cpustats); + free_percpu(ipvs->tot_stats.cpustats); err_alloc: - kfree(ipvs->tot_stats); return -ENOMEM; } @@ -3582,7 +3586,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_kill_estimator(net, ipvs->tot_stats); + ip_vs_kill_estimator(net, &ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); #ifdef CONFIG_SYSCTL @@ -3591,8 +3595,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); - free_percpu(ipvs->cpustats); - kfree(ipvs->tot_stats); + free_percpu(ipvs->tot_stats.cpustats); } static struct pernet_operations ipvs_control_ops = { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 88bd716..b3751cf 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg) struct netns_ipvs *ipvs; ipvs = net_ipvs(net); - ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; n_outpkts = s->ustats.outpkts; -- cgit v0.10.2 From 55a3d4e15c7c953ecc55b96b83d2679abf8a7899 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:37:49 +0200 Subject: ipvs: properly zero stats and rates Currently, the new percpu counters are not zeroed and the zero commands do not work as expected, we still show the old sum of percpu values. OTOH, we can not reset the percpu counters from user context without causing the incrementing to use old and bogus values. So, as Eric Dumazet suggested fix that by moving all overhead to stats reading in user context. Do not introduce overhead in timer context (estimator) and incrementing (packet handling in softirqs). The new ustats0 field holds the zero point for all counter values, the rates always use 0 as base value as before. When showing the values to user space just give the difference between counters and the base values. The only drawback is that percpu stats are not zeroed, they are accessible only from /proc and are new interface, so it should not be a compatibility problem as long as the sum stats are correct after zeroing. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9db750d..06f5af4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -374,6 +374,7 @@ struct ip_vs_stats { struct ip_vs_estimator est; /* estimator */ struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ spinlock_t lock; /* spin lock */ + struct ip_vs_stats_user ustats0; /* reset values */ }; /* diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a2a67ad..804fee7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -711,13 +711,51 @@ static void ip_vs_trash_cleanup(struct net *net) } } +static void +ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +{ +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c +#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r + + spin_lock_bh(&src->lock); + + IP_VS_SHOW_STATS_COUNTER(conns); + IP_VS_SHOW_STATS_COUNTER(inpkts); + IP_VS_SHOW_STATS_COUNTER(outpkts); + IP_VS_SHOW_STATS_COUNTER(inbytes); + IP_VS_SHOW_STATS_COUNTER(outbytes); + + IP_VS_SHOW_STATS_RATE(cps); + IP_VS_SHOW_STATS_RATE(inpps); + IP_VS_SHOW_STATS_RATE(outpps); + IP_VS_SHOW_STATS_RATE(inbps); + IP_VS_SHOW_STATS_RATE(outbps); + + spin_unlock_bh(&src->lock); +} static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(&stats->ustats, 0, sizeof(stats->ustats)); + /* get current counters as zero point, rates are zeroed */ + +#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c +#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0 + + IP_VS_ZERO_STATS_COUNTER(conns); + IP_VS_ZERO_STATS_COUNTER(inpkts); + IP_VS_ZERO_STATS_COUNTER(outpkts); + IP_VS_ZERO_STATS_COUNTER(inbytes); + IP_VS_ZERO_STATS_COUNTER(outbytes); + + IP_VS_ZERO_STATS_RATE(cps); + IP_VS_ZERO_STATS_RATE(inpps); + IP_VS_ZERO_STATS_RATE(outpps); + IP_VS_ZERO_STATS_RATE(inbps); + IP_VS_ZERO_STATS_RATE(outbps); + ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -1963,7 +2001,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_stats_user show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -1971,22 +2009,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&tot_stats->lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, - tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, + show.inpkts, show.outpkts, + (unsigned long long) show.inbytes, + (unsigned long long) show.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq,"%8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16X %16X\n", + show.cps, show.inpps, show.outpps, + show.inbps, show.outbps); return 0; } @@ -2298,14 +2332,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) -{ - spin_lock_bh(&src->lock); - memcpy(dst, &src->ustats, sizeof(*dst)); - spin_unlock_bh(&src->lock); -} - -static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; @@ -2691,31 +2717,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, struct ip_vs_stats *stats) { + struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); if (!nl_stats) return -EMSGSIZE; - spin_lock_bh(&stats->lock); - - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + ip_vs_copy_stats(&ustats, stats); - spin_unlock_bh(&stats->lock); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps); nla_nest_end(skb, nl_stats); return 0; nla_put_failure: - spin_unlock_bh(&stats->lock); nla_nest_cancel(skb, nl_stats); return -EMSGSIZE; } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index b3751cf..a850087 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -184,13 +184,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - - /* set counters zero, caller must hold the stats->lock lock */ - est->last_inbytes = 0; - est->last_outbytes = 0; - est->last_conns = 0; - est->last_inpkts = 0; - est->last_outpkts = 0; + struct ip_vs_stats_user *u = &stats->ustats; + + /* reset counters, caller must hold the stats->lock lock */ + est->last_inbytes = u->inbytes; + est->last_outbytes = u->outbytes; + est->last_conns = u->conns; + est->last_inpkts = u->inpkts; + est->last_outpkts = u->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; -- cgit v0.10.2 From 87d68a15e2d5a6bd08e59ec80c7a5073bcabb7c3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:39:18 +0200 Subject: ipvs: remove unused seqcount stats Remove ustats_seq, IPVS_STAT_INC and IPVS_STAT_ADD because they are not used. They were replaced with u64_stats. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 06f5af4..cf014ab 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -377,22 +377,6 @@ struct ip_vs_stats { struct ip_vs_stats_user ustats0; /* reset values */ }; -/* - * Helper Macros for per cpu - * ipvs->tot_stats->ustats.count - */ -#define IPVS_STAT_INC(ipvs, count) \ - __this_cpu_inc((ipvs)->ustats->count) - -#define IPVS_STAT_ADD(ipvs, count, value) \ - do {\ - write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \ - raw_smp_processor_id())); \ - __this_cpu_add((ipvs)->ustats->count, value); \ - write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \ - raw_smp_processor_id())); \ - } while (0) - struct dst_entry; struct iphdr; struct ip_vs_conn; @@ -853,7 +837,6 @@ struct netns_ipvs { /* ip_vs_ctl */ struct ip_vs_stats tot_stats; /* Statistics & est. */ - seqcount_t *ustats_seq; /* u64 read retry */ int num_services; /* no of virtual services */ /* 1/rate drop and drop-entry variables */ -- cgit v0.10.2 From ea9f22cce9c2530d659f9122819940b69506b2d9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:41:54 +0200 Subject: ipvs: optimize rates reading Move the estimator reading from estimation_timer to user context. ip_vs_read_estimator() will be used to decode the rate values. As the decoded rates are not set by estimation timer there is no need to reset them in ip_vs_zero_stats. There is no need ip_vs_new_estimator() to encode stats to rates, if the destination is in trash both the stats and the rates are inactive. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cf014ab..e4a39c4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1179,6 +1179,8 @@ extern void ip_vs_estimator_cleanup(void); extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); +extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst, + struct ip_vs_stats *stats); /* * Various IPVS packet transmitters (from ip_vs_xmit.c) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 804fee7..c93d806 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -715,7 +715,6 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c -#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r spin_lock_bh(&src->lock); @@ -725,11 +724,7 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) IP_VS_SHOW_STATS_COUNTER(inbytes); IP_VS_SHOW_STATS_COUNTER(outbytes); - IP_VS_SHOW_STATS_RATE(cps); - IP_VS_SHOW_STATS_RATE(inpps); - IP_VS_SHOW_STATS_RATE(outpps); - IP_VS_SHOW_STATS_RATE(inbps); - IP_VS_SHOW_STATS_RATE(outbps); + ip_vs_read_estimator(dst, src); spin_unlock_bh(&src->lock); } @@ -742,7 +737,6 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) /* get current counters as zero point, rates are zeroed */ #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c -#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0 IP_VS_ZERO_STATS_COUNTER(conns); IP_VS_ZERO_STATS_COUNTER(inpkts); @@ -750,12 +744,6 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) IP_VS_ZERO_STATS_COUNTER(inbytes); IP_VS_ZERO_STATS_COUNTER(outbytes); - IP_VS_ZERO_STATS_RATE(cps); - IP_VS_ZERO_STATS_RATE(inpps); - IP_VS_ZERO_STATS_RATE(outpps); - IP_VS_ZERO_STATS_RATE(inbps); - IP_VS_ZERO_STATS_RATE(outbps); - ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -2043,6 +2031,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) struct net *net = seq_file_single_net(seq); struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; + struct ip_vs_stats_user rates; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2069,22 +2058,26 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) } spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, (unsigned long long) tot_stats->ustats.inbytes, (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_read_estimator(&rates, tot_stats); + + spin_unlock_bh(&tot_stats->lock); + /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq, " %8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + rates.cps, + rates.inpps, + rates.outpps, + rates.inbps, + rates.outbps); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index a850087..fda75be 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -117,27 +117,22 @@ static void estimation_timer(unsigned long arg) rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps) >> 2; - s->ustats.cps = (e->cps + 0x1FF) >> 10; rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps) >> 2; - s->ustats.inpps = (e->inpps + 0x1FF) >> 10; rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps) >> 2; - s->ustats.outpps = (e->outpps + 0x1FF) >> 10; rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps) >> 2; - s->ustats.inbps = (e->inbps + 0xF) >> 5; rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps) >> 2; - s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); @@ -151,21 +146,6 @@ void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) INIT_LIST_HEAD(&est->list); - est->last_conns = stats->ustats.conns; - est->cps = stats->ustats.cps<<10; - - est->last_inpkts = stats->ustats.inpkts; - est->inpps = stats->ustats.inpps<<10; - - est->last_outpkts = stats->ustats.outpkts; - est->outpps = stats->ustats.outpps<<10; - - est->last_inbytes = stats->ustats.inbytes; - est->inbps = stats->ustats.inbps<<5; - - est->last_outbytes = stats->ustats.outbytes; - est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&ipvs->est_lock); list_add(&est->list, &ipvs->est_list); spin_unlock_bh(&ipvs->est_lock); @@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } +/* Get decoded rates */ +void ip_vs_read_estimator(struct ip_vs_stats_user *dst, + struct ip_vs_stats *stats) +{ + struct ip_vs_estimator *e = &stats->est; + + dst->cps = (e->cps + 0x1FF) >> 10; + dst->inpps = (e->inpps + 0x1FF) >> 10; + dst->outpps = (e->outpps + 0x1FF) >> 10; + dst->inbps = (e->inbps + 0xF) >> 5; + dst->outbps = (e->outbps + 0xF) >> 5; +} + static int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -- cgit v0.10.2 From 6ef757f965c9133e82116475eab7f30df391c7fa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:44:28 +0200 Subject: ipvs: rename estimator functions Rename ip_vs_new_estimator to ip_vs_start_estimator and ip_vs_kill_estimator to ip_vs_stop_estimator to better match their logic. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e4a39c4..7ca5be2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1176,8 +1176,8 @@ extern void ip_vs_sync_cleanup(void); */ extern int ip_vs_estimator_init(void); extern void ip_vs_estimator_cleanup(void); -extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); -extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); +extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats); +extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst, struct ip_vs_stats *stats); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c93d806..c5b1234 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -802,7 +802,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); if (add) - ip_vs_new_estimator(svc->net, &dest->stats); + ip_vs_start_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -1008,7 +1008,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_kill_estimator(net, &dest->stats); + ip_vs_stop_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1201,7 +1201,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - ip_vs_new_estimator(net, &svc->stats); + ip_vs_start_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1353,7 +1353,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) if (svc->af == AF_INET) ipvs->num_services--; - ip_vs_kill_estimator(svc->net, &svc->stats); + ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -3585,7 +3585,7 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_dup; } #endif - ip_vs_new_estimator(net, &ipvs->tot_stats); + ip_vs_start_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3603,7 +3603,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_kill_estimator(net, &ipvs->tot_stats); + ip_vs_stop_estimator(net, &ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index fda75be..8c8766c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -139,7 +139,7 @@ static void estimation_timer(unsigned long arg) mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; @@ -151,7 +151,7 @@ void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; -- cgit v0.10.2 From ba4fd7e966fa837557a3ec846c5fd15328b212af Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add ip_vs_route_me_harder() Add ip_vs_route_me_harder() to avoid repeating the same code twice. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6f4940e..299c7f3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -631,6 +631,24 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) } #endif +static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +{ + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) + return 1; + } else +#endif + if ((ipvs->sysctl_snat_reroute || + skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + ip_route_me_harder(skb, RTN_LOCAL) != 0) + return 1; + + return 0; +} + /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in @@ -737,7 +755,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { - struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -759,8 +776,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; - ipvs = net_ipvs(skb_net(skb)); - #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -768,16 +783,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto out; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto out; + if (ip_vs_route_me_harder(af, skb)) + goto out; /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -985,7 +992,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; - struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1021,18 +1027,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - ipvs = net_ipvs(skb_net(skb)); - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto drop; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; + if (ip_vs_route_me_harder(af, skb)) + goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); -- cgit v0.10.2 From 84b3cee39ff1ffc97f4f6fba8ad26786c1f6d8f5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_snat_reroute() In preparation for not including sysctl_snat_reroute in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 299c7f3..1d8a2a2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -599,6 +599,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } +#ifdef CONFIG_SYSCTL + +static int sysctl_snat_reroute(struct sk_buff *skb) +{ + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + return ipvs->sysctl_snat_reroute; +} + +#else + +static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } + +#endif + __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -633,15 +647,13 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) static int ip_vs_route_me_harder(int af, struct sk_buff *skb) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); - #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) + if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) return 1; } else #endif - if ((ipvs->sysctl_snat_reroute || + if ((sysctl_snat_reroute(skb) || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) return 1; -- cgit v0.10.2 From 0cfa558e2c21644a0dd6c21cfadd8bbeaf9fe1a0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_nat_icmp_send() In preparation for not including sysctl_nat_icmp_send in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1d8a2a2..c9b8372 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -607,9 +607,16 @@ static int sysctl_snat_reroute(struct sk_buff *skb) return ipvs->sysctl_snat_reroute; } +static int sysctl_nat_icmp_send(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + return ipvs->sysctl_nat_icmp_send; +} + #else static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } +static int sysctl_nat_icmp_send(struct net *net) { return 0; } #endif @@ -1074,7 +1081,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; - struct netns_ipvs *ipvs; EnterFunction(11); @@ -1149,11 +1155,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); - ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (ipvs->sysctl_nat_icmp_send && + if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { -- cgit v0.10.2 From 59e0350eada0516a810cb780db37746165f1d516 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add {sysctl_sync_threshold,period}() In preparation for not including sysctl_sync_threshold in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7ca5be2..253736d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -909,6 +909,35 @@ struct netns_ipvs { struct net *net; /* Needed by timer routines */ }; +#define DEFAULT_SYNC_THRESHOLD 3 +#define DEFAULT_SYNC_PERIOD 50 + +#ifdef CONFIG_SYSCTL + +static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_threshold[0]; +} + +static inline int sysctl_sync_period(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_threshold[1]; +} + +#else + +static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) +{ + return DEFAULT_SYNC_THRESHOLD; +} + +static inline int sysctl_sync_period(struct netns_ipvs *ipvs) +{ + return DEFAULT_SYNC_PERIOD; +} + +#endif + /* * IPVS core functions * (from ip_vs_core.c) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c9b8372..6a0053d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1613,15 +1613,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = ipvs->sysctl_sync_threshold[0]; + pkts = sysctl_sync_threshold(ipvs); else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1635,8 +1635,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c5b1234..364520f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3569,8 +3569,8 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; - ipvs->sysctl_sync_threshold[0] = 3; - ipvs->sysctl_sync_threshold[1] = 50; + ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; + ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c5d13b0..e84987f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % ipvs->sysctl_sync_threshold[1] != 1) + if (pkts % sysctl_sync_period(ipvs) != 1) return; } goto sloop; @@ -794,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); + atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; /* -- cgit v0.10.2 From 7532e8d40ccfdde6667169eeac4fd7778d6eb462 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_sync_ver() In preparation for not including sysctl_sync_ver in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 253736d..687ef18 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -911,6 +911,7 @@ struct netns_ipvs { #define DEFAULT_SYNC_THRESHOLD 3 #define DEFAULT_SYNC_PERIOD 50 +#define DEFAULT_SYNC_VER 1 #ifdef CONFIG_SYSCTL @@ -924,6 +925,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs) return ipvs->sysctl_sync_threshold[1]; } +static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_ver; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -936,6 +942,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs) return DEFAULT_SYNC_PERIOD; } +static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) +{ + return DEFAULT_SYNC_VER; +} + #endif /* diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e84987f..3e7961e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) return; - if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) + if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (ipvs->sysctl_sync_ver == 0) { + if (sysctl_sync_ver(ipvs) == 0) { ip_vs_sync_conn_v0(net, cp); return; } -- cgit v0.10.2 From 71a8ab6cad63b4816711f2ea518755677a870f6f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_expire_nodest_conn() In preparation for not including sysctl_expire_nodest_conn in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6a0053d..d418bc6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -613,10 +613,16 @@ static int sysctl_nat_icmp_send(struct net *net) return ipvs->sysctl_nat_icmp_send; } +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_expire_nodest_conn; +} + #else static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } static int sysctl_nat_icmp_send(struct net *net) { return 0; } +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -1583,7 +1589,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (ipvs->sysctl_expire_nodest_conn) { + if (sysctl_expire_nodest_conn(ipvs)) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } -- cgit v0.10.2 From 8e1b0b1b560019cafebe45a7d9e6ec1122fedc7b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add expire_quiescent_template() In preparation for not including sysctl_expire_quiescent_template in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9c2a517..f289306 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_dec(&dest->refcnt); } +static int expire_quiescent_template(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest) +{ +#ifdef CONFIG_SYSCTL + return ipvs->sysctl_expire_quiescent_template && + (atomic_read(&dest->weight) == 0); +#else + return 0; +#endif +} /* * Checking if the destination of a connection template is available. @@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (ipvs->sysctl_expire_quiescent_template && - (atomic_read(&dest->weight) == 0))) { + expire_quiescent_template(ipvs, dest)) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", -- cgit v0.10.2 From b27d777ec54205eb56cf4e873d043a426881c629 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Conditinally use sysctl_lblc{r}_expiration In preparation for not including sysctl_lblc{r}_expiration in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6bf7a80..51a27f5 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -238,6 +240,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) } } +static int sysctl_lblc_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblc_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { @@ -245,7 +256,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; ibucket[j], list) { if (time_before(now, en->lastuse + - ipvs->sysctl_lblc_expiration)) + sysctl_lblc_expiration(svc))) continue; ip_vs_lblc_free(en); @@ -550,7 +560,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; } else ipvs->lblc_ctl_table = vs_vars_table; - ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0063176..7fb9190 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -410,6 +412,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) } } +static int sysctl_lblcr_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblcr_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { @@ -417,15 +428,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse - + ipvs->sysctl_lblcr_expiration, now)) + if (time_after(en->lastuse + + sysctl_lblcr_expiration(svc), now)) continue; ip_vs_lblcr_free(en); @@ -650,7 +660,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -662,7 +671,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - ipvs->sysctl_lblcr_expiration)) { + sysctl_lblcr_expiration(svc))) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -746,7 +755,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; } else ipvs->lblcr_ctl_table = vs_vars_table; - ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; #ifdef CONFIG_SYSCTL -- cgit v0.10.2 From 3a1bbf1885e94ecedf1deaaab1ace8409330aa7e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 687ef18..77ebece 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1253,6 +1253,7 @@ extern int ip_vs_icmp_xmit_v6 int offset); #endif +#ifdef CONFIG_SYSCTL /* * This is a simple mechanism to ignore packets when * we are loaded. Just set ip_vs_drop_rate to 'n' and @@ -1268,6 +1269,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) ipvs->drop_counter = ipvs->drop_rate; return 1; } +#else +static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } +#endif /* * ip_vs_fwd_tag returns the forwarding tag of the connection -- cgit v0.10.2 From a4e2f5a700cb93448b2da0e158149d18dc5290ef Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Conditional ip_vs_conntrack_enabled() ip_vs_conntrack_enabled() becomes a noop when CONFIG_SYSCTL is undefined. In preparation for not including sysctl_conntrack in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 77ebece..299aeb5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1359,7 +1359,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb) */ static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { +#ifdef CONFIG_SYSCTL return ipvs->sysctl_conntrack; +#else + return 0; +#endif } extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, -- cgit v0.10.2 From a7a86b8616bc1595c4f5f109b7c39d4eb5d55e32 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined Much of ip_vs_leave() is unnecessary if CONFIG_SYSCTL is undefined. I tried an approach of breaking the now #ifdef'ed portions out into a separate function. However this appeared to grow the compiled code on x86_64 by about 200 bytes in the case where CONFIG_SYSCTL is defined. So I have gone with the simpler though less elegant #ifdef'ed solution for now. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d418bc6..07accf6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { - struct net *net; - struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; +#ifdef CONFIG_SYSCTL + struct net *net; + struct netns_ipvs *ipvs; int unicast; +#endif ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); @@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_service_put(svc); return NF_DROP; } + +#ifdef CONFIG_SYSCTL net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 @@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_put(cp); return ret; } +#endif /* * When the virtual ftp service is presented, packets destined -- cgit v0.10.2 From fb1de432c1c7c26afb2e86d166fc37888b6a4423 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Conditionally define and use ip_vs_lblc{r}_table ip_vs_lblc_table and ip_vs_lblcr_table, and code that uses them are unnecessary when CONFIG_SYSCTL is undefined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 51a27f5..f276df9 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -114,7 +114,7 @@ struct ip_vs_lblc_table { /* * IPVS LBLC sysctl table */ - +#ifdef CONFIG_SYSCTL static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", @@ -125,6 +125,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { @@ -548,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -563,7 +565,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net) ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblc_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblc_ctl_table); @@ -572,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net) kfree(ipvs->lblc_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -581,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblc_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); } +#else + +static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblc_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 7fb9190..cb1c991 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -285,6 +285,7 @@ struct ip_vs_lblcr_table { }; +#ifdef CONFIG_SYSCTL /* * IPVS LBLCR sysctl table */ @@ -299,6 +300,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { @@ -743,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -758,7 +761,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblcr_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblcr_ctl_table); @@ -767,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) kfree(ipvs->lblcr_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -776,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblcr_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); } +#else + +static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, -- cgit v0.10.2 From 14e405461e664b777e2a5636e10b2ebf36a686ec Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Add __ip_vs_control_{init,cleanup}_sysctl() Break out the portions of __ip_vs_control_init() and __ip_vs_control_cleanup() where aren't necessary when CONFIG_SYSCTL is undefined. Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 364520f..fa6d44c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -88,6 +88,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net, return 0; } #endif + +#ifdef CONFIG_SYSCTL /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -229,6 +231,7 @@ static void defense_work_handler(struct work_struct *work) ip_vs_random_dropentry(ipvs->net); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } +#endif int ip_vs_use_count_inc(void) @@ -1511,7 +1514,7 @@ static int ip_vs_zero_all(struct net *net) return 0; } - +#ifdef CONFIG_SYSCTL static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1533,7 +1536,6 @@ proc_do_defense_mode(ctl_table *table, int write, return rc; } - static int proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1767,6 +1769,7 @@ const struct ctl_path net_vs_ctl_path[] = { { } }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); +#endif #ifdef CONFIG_PROC_FS @@ -3511,7 +3514,8 @@ static void ip_vs_genl_unregister(void) /* * per netns intit/exit func. */ -int __net_init __ip_vs_control_init(struct net *net) +#ifdef CONFIG_SYSCTL +int __net_init __ip_vs_control_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3521,33 +3525,11 @@ int __net_init __ip_vs_control_init(struct net *net) spin_lock_init(&ipvs->dropentry_lock); spin_lock_init(&ipvs->droppacket_lock); spin_lock_init(&ipvs->securetcp_lock); - ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); - - /* Initialize rs_table */ - for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); - - INIT_LIST_HEAD(&ipvs->dest_trash); - atomic_set(&ipvs->ftpsvc_counter, 0); - atomic_set(&ipvs->nullsvc_counter, 0); - - /* procfs stats */ - ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->tot_stats.cpustats) { - pr_err("%s() alloc_percpu failed\n", __func__); - goto err_alloc; - } - spin_lock_init(&ipvs->tot_stats.lock); - - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); - proc_net_fops_create(net, "ip_vs_stats_percpu", 0, - &ip_vs_stats_percpu_fops); if (!net_eq(net, &init_net)) { tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); if (tbl == NULL) - goto err_dup; + return -ENOMEM; } else tbl = vs_vars; /* Initialize sysctl defaults */ @@ -3576,25 +3558,73 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; -#ifdef CONFIG_SYSCTL ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl); if (ipvs->sysctl_hdr == NULL) { if (!net_eq(net, &init_net)) kfree(tbl); - goto err_dup; + return -ENOMEM; } -#endif ip_vs_start_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + return 0; +} + +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); + unregister_net_sysctl_table(ipvs->sysctl_hdr); +} -err_dup: +#else + +int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; } +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } + +#endif + +int __net_init __ip_vs_control_init(struct net *net) +{ + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + + INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); + + /* procfs stats */ + ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (ipvs->tot_stats.cpustats) { + pr_err("%s(): alloc_percpu.\n", __func__); + return -ENOMEM; + } + spin_lock_init(&ipvs->tot_stats.lock); + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); + + if (__ip_vs_control_init_sysctl(net)) + goto err; + + return 0; + +err: free_percpu(ipvs->tot_stats.cpustats); -err_alloc: return -ENOMEM; } @@ -3604,11 +3634,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); - cancel_delayed_work_sync(&ipvs->defense_work); - cancel_work_sync(&ipvs->defense_work.work); -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(ipvs->sysctl_hdr); -#endif + __ip_vs_control_cleanup_sysctl(net); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); -- cgit v0.10.2 From f2247fbdc41372d64c89505280419ceb45d80a31 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Conditionally include sysctl members of struct netns_ipvs There is now no need to include sysctl members of struct netns_ipvs unless CONFIG_SYSCTL is defined. Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 299aeb5..272f593 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -839,6 +839,17 @@ struct netns_ipvs { struct ip_vs_stats tot_stats; /* Statistics & est. */ int num_services; /* no of virtual services */ + + rwlock_t rs_lock; /* real services table */ + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ + struct lock_class_key ctl_key; /* ctl_mutex debuging */ + /* Trash for destinations */ + struct list_head dest_trash; + /* Service counters */ + atomic_t ftpsvc_counter; + atomic_t nullsvc_counter; + +#ifdef CONFIG_SYSCTL /* 1/rate drop and drop-entry variables */ struct delayed_work defense_work; /* Work handler */ int drop_rate; @@ -848,18 +859,12 @@ struct netns_ipvs { spinlock_t dropentry_lock; /* drop entry handling */ spinlock_t droppacket_lock; /* drop packet handling */ spinlock_t securetcp_lock; /* state and timeout tables */ - rwlock_t rs_lock; /* real services table */ - /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ - struct lock_class_key ctl_key; /* ctl_mutex debuging */ - /* Trash for destinations */ - struct list_head dest_trash; - /* Service counters */ - atomic_t ftpsvc_counter; - atomic_t nullsvc_counter; /* sys-ctl struct */ struct ctl_table_header *sysctl_hdr; struct ctl_table *sysctl_tbl; +#endif + /* sysctl variables */ int sysctl_amemthresh; int sysctl_am_droprate; -- cgit v0.10.2 From 8183e3a88aced228ab9770762692be6cc3786e80 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:23:28 +0100 Subject: netfilter: xt_connlimit: fix daddr connlimit in SNAT scenario We use the reply tuples when limiting the connections by the destination addresses, however, in SNAT scenario, the final reply tuples won't be ready until SNAT is done in POSTROUING or INPUT chain, and the following nf_conntrack_find_get() in count_tem() will get nothing, so connlimit can't work as expected. In this patch, the original tuples are always used, and an additional member addr is appended to save the address in either end. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index e029c48..1f4b9f9 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -33,8 +33,9 @@ /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { - struct list_head list; - struct nf_conntrack_tuple tuple; + struct list_head list; + struct nf_conntrack_tuple tuple; + union nf_inet_addr addr; }; struct xt_connlimit_data { @@ -151,7 +152,7 @@ static int count_them(struct net *net, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) + if (same_source_net(addr, mask, &conn->addr, family)) /* same source network -> be counted! */ ++matches; nf_ct_put(found_ct); @@ -165,6 +166,7 @@ static int count_them(struct net *net, if (conn == NULL) return -ENOMEM; conn->tuple = *tuple; + conn->addr = *addr; list_add(&conn->list, hash); ++matches; } @@ -185,15 +187,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) int connections; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) { - if (info->flags & XT_CONNLIMIT_DADDR) - tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - else - tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) { + if (ct != NULL) + tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) goto hotdrop; - } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); -- cgit v0.10.2 From 0e23ca14f8e76091b402c01e2b169aba3d187b98 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:24:56 +0100 Subject: netfilter: xt_connlimit: use kmalloc() instead of kzalloc() All the members are initialized after kzalloc(). Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 1f4b9f9..ade2a80 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -162,7 +162,7 @@ static int count_them(struct net *net, if (addit) { /* save the new connection in our list */ - conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); if (conn == NULL) return -ENOMEM; conn->tuple = *tuple; -- cgit v0.10.2 From 3e0d5149e6dcbe7111a63773a07c5b33f7ca7236 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:25:42 +0100 Subject: netfilter: xt_connlimit: use hlist instead The header of hlist is smaller than list. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ade2a80..da56d6e 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -33,14 +33,14 @@ /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { - struct list_head list; + struct hlist_node node; struct nf_conntrack_tuple tuple; union nf_inet_addr addr; }; struct xt_connlimit_data { - struct list_head iphash[256]; - spinlock_t lock; + struct hlist_head iphash[256]; + spinlock_t lock; }; static u_int32_t connlimit_rnd __read_mostly; @@ -102,9 +102,9 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; + struct hlist_node *pos, *n; struct nf_conn *found_ct; - struct list_head *hash; + struct hlist_head *hash; bool addit = true; int matches = 0; @@ -116,7 +116,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - list_for_each_entry_safe(conn, tmp, hash, list) { + hlist_for_each_entry_safe(conn, pos, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -136,7 +136,7 @@ static int count_them(struct net *net, if (found == NULL) { /* this one is gone */ - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } @@ -147,7 +147,7 @@ static int count_them(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } @@ -167,7 +167,7 @@ static int count_them(struct net *net, return -ENOMEM; conn->tuple = *tuple; conn->addr = *addr; - list_add(&conn->list, hash); + hlist_add_head(&conn->node, hash); ++matches; } @@ -246,7 +246,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) spin_lock_init(&info->data->lock); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) - INIT_LIST_HEAD(&info->data->iphash[i]); + INIT_HLIST_HEAD(&info->data->iphash[i]); return 0; } @@ -255,15 +255,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; - struct list_head *hash = info->data->iphash; + struct hlist_node *pos, *n; + struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - list_for_each_entry_safe(conn, tmp, &hash[i], list) { - list_del(&conn->list); + hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_del(&conn->node); kfree(conn); } } -- cgit v0.10.2 From 4656c4d61adb8dc3ee04c08f57a5cc7598814420 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:26:32 +0100 Subject: netfilter: xt_connlimit: remove connlimit_rnd_inited A potential race condition when generating connlimit_rnd is also fixed. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index da56d6e..c6d5a83 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -44,7 +44,6 @@ struct xt_connlimit_data { }; static u_int32_t connlimit_rnd __read_mostly; -static bool connlimit_rnd_inited __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { @@ -226,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd_inited)) { - get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); - connlimit_rnd_inited = true; + if (unlikely(!connlimit_rnd)) { + u_int32_t rand; + + do { + get_random_bytes(&rand, sizeof(rand)); + } while (!rand); + cmpxchg(&connlimit_rnd, 0, rand); } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { -- cgit v0.10.2 From 42eab94fff18cb1091d3501cd284d6bd6cc9c143 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:35:21 +0100 Subject: netfilter: arp_tables: fix infoleak to userspace Structures ipt_replace, compat_ipt_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first bug was introduced before the git epoch; the second is introduced by 6b7d31fc (v2.6.15-rc1); the third is introduced by 6b7d31fc (v2.6.15-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e95054c..4b5d457 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), -- cgit v0.10.2 From 78b79876761b86653df89c48a7010b5cbd41a84a Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:36:05 +0100 Subject: netfilter: ip_tables: fix infoleak to userspace Structures ipt_replace, compat_ipt_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first and the third bugs were introduced before the git epoch; the second was introduced in 2722971c (v2.6.17-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef7d7b9..b09ed0d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; -- cgit v0.10.2 From 6a8ab060779779de8aea92ce3337ca348f973f54 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:37:13 +0100 Subject: ipv6: netfilter: ip6_tables: fix infoleak to userspace Structures ip6t_replace, compat_ip6t_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first bug was introduced before the git epoch; the second was introduced in 3bc3fe5e (v2.6.25-rc1); the third is introduced by 6b7d31fc (v2.6.15-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 47b7b8d..c9598a9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; -- cgit v0.10.2 From de81bbea17650769882bc625d6b5df11ee7c4b24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:16:20 +0100 Subject: netfilter: ipt_addrtype: rename to xt_addrtype Followup patch will add ipv6 support. ipt_addrtype.h is retained for compatibility reasons, but no longer used by the kernel. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 05b248a..a7ee7cf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -618,3 +618,11 @@ Who: Jan Engelhardt Files: net/netfilter/xt_connlimit.c ---------------------------- + +What: ipt_addrtype match include file +When: 2012 +Why: superseded by xt_addrtype +Who: Florian Westphal +Files: include/linux/netfilter_ipv4/ipt_addrtype.h + +---------------------------- diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 15e83bf..a1b410c 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -29,6 +29,7 @@ header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h +header-y += xt_addrtype.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/linux/netfilter/xt_addrtype.h b/include/linux/netfilter/xt_addrtype.h new file mode 100644 index 0000000..b492fc8 --- /dev/null +++ b/include/linux/netfilter/xt_addrtype.h @@ -0,0 +1,27 @@ +#ifndef _XT_ADDRTYPE_H +#define _XT_ADDRTYPE_H + +#include + +enum { + XT_ADDRTYPE_INVERT_SOURCE = 0x0001, + XT_ADDRTYPE_INVERT_DEST = 0x0002, + XT_ADDRTYPE_LIMIT_IFACE_IN = 0x0004, + XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008, +}; + +struct xt_addrtype_info_v1 { + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 flags; +}; + +/* revision 0 */ +struct xt_addrtype_info { + __u16 source; /* source-type mask */ + __u16 dest; /* dest-type mask */ + __u32 invert_source; + __u32 invert_dest; +}; + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f926a31..1dfc18a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -64,16 +64,6 @@ config IP_NF_IPTABLES if IP_NF_IPTABLES # The matches. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - . If unsure, say `N'. - config IP_NF_MATCH_AH tristate '"ah" match support' depends on NETFILTER_ADVANCED diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19eb59d..dca2082 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index db8bff0..0000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * iptables module to match inet_addr_type() of an ip. - * - * Copyright (c) 2004 Patrick McHardy - * (C) 2007 Laszlo Attila Toth - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("Xtables: address type match for IPv4"); - -static inline bool match_type(struct net *net, const struct net_device *dev, - __be32 addr, u_int16_t mask) -{ - return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); -} - -static bool -addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - bool ret = true; - - if (info->source) - ret &= match_type(net, NULL, iph->saddr, info->source) ^ - info->invert_source; - if (info->dest) - ret &= match_type(net, NULL, iph->daddr, info->dest) ^ - info->invert_dest; - - return ret; -} - -static bool -addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info_v1 *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - const struct net_device *dev = NULL; - bool ret = true; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; - else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; - - if (info->source) - ret &= match_type(net, dev, iph->saddr, info->source) ^ - (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); - if (ret && info->dest) - ret &= match_type(net, dev, iph->daddr, info->dest) ^ - !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); - return ret; -} - -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) -{ - struct ipt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; - } - - return 0; -} - -static struct xt_match addrtype_mt_reg[] __read_mostly = { - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .match = addrtype_mt_v0, - .matchsize = sizeof(struct ipt_addrtype_info), - .me = THIS_MODULE - }, - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .revision = 1, - .match = addrtype_mt_v1, - .checkentry = addrtype_mt_checkentry_v1, - .matchsize = sizeof(struct ipt_addrtype_info_v1), - .me = THIS_MODULE - } -}; - -static int __init addrtype_mt_init(void) -{ - return xt_register_matches(addrtype_mt_reg, - ARRAY_SIZE(addrtype_mt_reg)); -} - -static void __exit addrtype_mt_exit(void) -{ - xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); -} - -module_init(addrtype_mt_init); -module_exit(addrtype_mt_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 82a6e0d..32bff6d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -649,6 +649,16 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP comment "Xtables matches" +config NETFILTER_XT_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d57a890..1a02853 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches +obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c new file mode 100644 index 0000000..e89c0b8 --- /dev/null +++ b/net/netfilter/xt_addrtype.c @@ -0,0 +1,135 @@ +/* + * iptables module to match inet_addr_type() of an ip. + * + * Copyright (c) 2004 Patrick McHardy + * (C) 2007 Laszlo Attila Toth + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: address type match"); +MODULE_ALIAS("ipt_addrtype"); + +static inline bool match_type(struct net *net, const struct net_device *dev, + __be32 addr, u_int16_t mask) +{ + return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); +} + +static bool +addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info *info = par->matchinfo; + const struct iphdr *iph = ip_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type(net, NULL, iph->saddr, info->source) ^ + info->invert_source; + if (info->dest) + ret &= match_type(net, NULL, iph->daddr, info->dest) ^ + info->invert_dest; + + return ret; +} + +static bool +addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info_v1 *info = par->matchinfo; + const struct iphdr *iph = ip_hdr(skb); + const struct net_device *dev = NULL; + bool ret = true; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) + dev = par->in; + else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + dev = par->out; + + if (info->source) + ret &= match_type(net, dev, iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type(net, dev, iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("both incoming and outgoing " + "interface limitation cannot be selected\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("output interface limitation " + "not valid in PREROUTING and INPUT\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { + pr_info("input interface limitation " + "not valid in POSTROUTING and OUTPUT\n"); + return -EINVAL; + } + + return 0; +} + +static struct xt_match addrtype_mt_reg[] __read_mostly = { + { + .name = "addrtype", + .family = NFPROTO_IPV4, + .match = addrtype_mt_v0, + .matchsize = sizeof(struct xt_addrtype_info), + .me = THIS_MODULE + }, + { + .name = "addrtype", + .family = NFPROTO_IPV4, + .revision = 1, + .match = addrtype_mt_v1, + .checkentry = addrtype_mt_checkentry_v1, + .matchsize = sizeof(struct xt_addrtype_info_v1), + .me = THIS_MODULE + } +}; + +static int __init addrtype_mt_init(void) +{ + return xt_register_matches(addrtype_mt_reg, + ARRAY_SIZE(addrtype_mt_reg)); +} + +static void __exit addrtype_mt_exit(void) +{ + xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); +} + +module_init(addrtype_mt_init); +module_exit(addrtype_mt_exit); -- cgit v0.10.2 From 2f5dc63123905a89d4260ab8ee08d19ec104db04 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:17:44 +0100 Subject: netfilter: xt_addrtype: ipv6 support The kernel will refuse certain types that do not work in ipv6 mode. We can then add these features incrementally without risk of userspace breakage. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy diff --git a/include/linux/netfilter/xt_addrtype.h b/include/linux/netfilter/xt_addrtype.h index b492fc8..b156baa 100644 --- a/include/linux/netfilter/xt_addrtype.h +++ b/include/linux/netfilter/xt_addrtype.h @@ -10,6 +10,23 @@ enum { XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008, }; + +/* rtn_type enum values from rtnetlink.h, but shifted */ +enum { + XT_ADDRTYPE_UNSPEC = 1 << 0, + XT_ADDRTYPE_UNICAST = 1 << 1, /* 1 << RTN_UNICAST */ + XT_ADDRTYPE_LOCAL = 1 << 2, /* 1 << RTN_LOCAL, etc */ + XT_ADDRTYPE_BROADCAST = 1 << 3, + XT_ADDRTYPE_ANYCAST = 1 << 4, + XT_ADDRTYPE_MULTICAST = 1 << 5, + XT_ADDRTYPE_BLACKHOLE = 1 << 6, + XT_ADDRTYPE_UNREACHABLE = 1 << 7, + XT_ADDRTYPE_PROHIBIT = 1 << 8, + XT_ADDRTYPE_THROW = 1 << 9, + XT_ADDRTYPE_NAT = 1 << 10, + XT_ADDRTYPE_XRESOLVE = 1 << 11, +}; + struct xt_addrtype_info_v1 { __u16 source; /* source-type mask */ __u16 dest; /* dest-type mask */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32bff6d..c3f988a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -652,6 +652,7 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index e89c0b8..2220b85 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -16,6 +16,12 @@ #include #include +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#include +#include +#include +#endif + #include #include @@ -23,6 +29,73 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables: address type match"); MODULE_ALIAS("ipt_addrtype"); +MODULE_ALIAS("ip6t_addrtype"); + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +{ + u32 ret; + + if (!rt) + return XT_ADDRTYPE_UNREACHABLE; + + if (rt->rt6i_flags & RTF_REJECT) + ret = XT_ADDRTYPE_UNREACHABLE; + else + ret = 0; + + if (rt->rt6i_flags & RTF_LOCAL) + ret |= XT_ADDRTYPE_LOCAL; + if (rt->rt6i_flags & RTF_ANYCAST) + ret |= XT_ADDRTYPE_ANYCAST; + return ret; +} + +static bool match_type6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr, u16 mask) +{ + int addr_type = ipv6_addr_type(addr); + + if ((mask & XT_ADDRTYPE_MULTICAST) && + !(addr_type & IPV6_ADDR_MULTICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY) + return false; + + if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | + XT_ADDRTYPE_UNREACHABLE) & mask) { + struct rt6_info *rt; + u32 type; + int ifindex = dev ? dev->ifindex : 0; + + rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); + + type = xt_addrtype_rt6_to_type(rt); + + dst_release(&rt->dst); + return !!(mask & type); + } + return true; +} + +static bool +addrtype_mt6(struct net *net, const struct net_device *dev, + const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type6(net, dev, &iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type6(net, dev, &iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} +#endif static inline bool match_type(struct net *net, const struct net_device *dev, __be32 addr, u_int16_t mask) @@ -53,7 +126,7 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = dev_net(par->in ? par->in : par->out); const struct xt_addrtype_info_v1 *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; @@ -62,6 +135,11 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) dev = par->out; +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) + return addrtype_mt6(net, dev, skb, info); +#endif + iph = ip_hdr(skb); if (info->source) ret &= match_type(net, dev, iph->saddr, info->source) ^ (info->flags & XT_ADDRTYPE_INVERT_SOURCE); @@ -98,6 +176,22 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) return -EINVAL; } +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) { + if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { + pr_err("ipv6 BLACKHOLE matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { + pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { + pr_err("ipv6 does not support BROADCAST matching\n"); + return -EINVAL; + } + } +#endif return 0; } @@ -111,7 +205,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { }, { .name = "addrtype", - .family = NFPROTO_IPV4, + .family = NFPROTO_UNSPEC, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, -- cgit v0.10.2