diff options
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/core.c | 10 | ||||
-rw-r--r-- | net/netfilter/ipvs/Kconfig | 3 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_app.c | 58 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 76 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 16 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 21 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 83 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto.c | 5 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_acct.c | 4 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_cthelper.c | 2 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_log.c | 14 | ||||
-rw-r--r-- | net/netfilter/xt_LOG.c | 16 | ||||
-rw-r--r-- | net/netfilter/xt_NFQUEUE.c | 8 | ||||
-rw-r--r-- | net/netfilter/xt_osf.c | 2 | ||||
-rw-r--r-- | net/netfilter/xt_owner.c | 30 | ||||
-rw-r--r-- | net/netfilter/xt_recent.c | 13 |
16 files changed, 261 insertions, 100 deletions
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0bc6b60..8f4b0b2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -131,14 +131,13 @@ unsigned int nf_iterate(struct list_head *head, int hook_thresh) { unsigned int verdict; + struct nf_hook_ops *elem = list_entry_rcu(*i, struct nf_hook_ops, list); /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_continue_rcu(*i, head) { - struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; - + list_for_each_entry_continue_rcu(elem, head, list) { if (hook_thresh > elem->priority) continue; @@ -155,11 +154,14 @@ repeat: continue; } #endif - if (verdict != NF_REPEAT) + if (verdict != NF_REPEAT) { + *i = &elem->list; return verdict; + } goto repeat; } } + *i = &elem->list; return NF_ACCEPT; } diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index f987138..8b2cffd 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -250,7 +250,8 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT + depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \ + NF_CONNTRACK_FTP select IP_VS_NFCT ---help--- FTP is a protocol that transfers IP address and/or port number in diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 64f9e8f..9713e6e 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, } -/* - * ip_vs_app registration routine - */ -int register_ip_vs_app(struct net *net, struct ip_vs_app *app) +/* Register application for netns */ +struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) { struct netns_ipvs *ipvs = net_ipvs(net); - /* increase the module use count */ - ip_vs_use_count_inc(); + struct ip_vs_app *a; + int err = 0; + + if (!ipvs) + return ERR_PTR(-ENOENT); mutex_lock(&__ip_vs_app_mutex); - list_add(&app->a_list, &ipvs->app_list); + list_for_each_entry(a, &ipvs->app_list, a_list) { + if (!strcmp(app->name, a->name)) { + err = -EEXIST; + goto out_unlock; + } + } + a = kmemdup(app, sizeof(*app), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out_unlock; + } + INIT_LIST_HEAD(&a->incs_list); + list_add(&a->a_list, &ipvs->app_list); + /* increase the module use count */ + ip_vs_use_count_inc(); +out_unlock: mutex_unlock(&__ip_vs_app_mutex); - return 0; + return err ? ERR_PTR(err) : a; } @@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct ip_vs_app *inc, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_app *a, *anxt, *inc, *nxt; + + if (!ipvs) + return; mutex_lock(&__ip_vs_app_mutex); - list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); - } + list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { + if (app && strcmp(app->name, a->name)) + continue; + list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { + ip_vs_app_inc_release(net, inc); + } - list_del(&app->a_list); + list_del(&a->a_list); + kfree(a); - mutex_unlock(&__ip_vs_app_mutex); + /* decrease the module use count */ + ip_vs_use_count_dec(); + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + mutex_unlock(&__ip_vs_app_mutex); } @@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net) void __net_exit ip_vs_app_net_cleanup(struct net *net) { + unregister_ip_vs_app(net, NULL /* all */); proc_net_remove(net, "ip_vs_app"); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b54ecce..58918e2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, ihl, verdict; + unsigned int offset, offset2, ihl, verdict; + bool ipip; *related = 1; @@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) net = skb_net(skb); + /* Special case for errors for IPIP packets */ + ipip = false; + if (cih->protocol == IPPROTO_IPIP) { + if (unlikely(cih->frag_off & htons(IP_OFFSET))) + return NF_ACCEPT; + /* Error for our IPIP must arrive at LOCAL_IN */ + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) + return NF_ACCEPT; + offset += cih->ihl * 4; + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + ipip = true; + } + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); + offset2 = offset; offset += cih->ihl * 4; ip_vs_fill_iphdr(AF_INET, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); + /* The embedded headers contain source and dest in reverse order. + * For IPIP this is error for request, not for reply. + */ + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto out; } + if (ipip) { + __be32 info = ic->un.gateway; + + /* Update the MTU */ + if (ic->type == ICMP_DEST_UNREACH && + ic->code == ICMP_FRAG_NEEDED) { + struct ip_vs_dest *dest = cp->dest; + u32 mtu = ntohs(ic->un.frag.mtu); + + /* Strip outer IP and ICMP, go to IPIP header */ + __skb_pull(skb, ihl + sizeof(_icmph)); + offset2 -= ihl + sizeof(_icmph); + skb_reset_network_header(skb); + IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); + rcu_read_lock(); + ipv4_update_pmtu(skb, dev_net(skb->dev), + mtu, 0, 0, 0, 0); + rcu_read_unlock(); + /* Client uses PMTUD? */ + if (!(cih->frag_off & htons(IP_DF))) + goto ignore_ipip; + /* Prefer the resulting PMTU */ + if (dest) { + spin_lock(&dest->dst_lock); + if (dest->dst_cache) + mtu = dst_mtu(dest->dst_cache); + spin_unlock(&dest->dst_lock); + } + if (mtu > 68 + sizeof(struct iphdr)) + mtu -= sizeof(struct iphdr); + info = htonl(mtu); + } + /* Strip outer IP, ICMP and IPIP, go to IP header of + * original request. + */ + __skb_pull(skb, offset2); + skb_reset_network_header(skb); + IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, + ic->type, ic->code, ntohl(info)); + icmp_send(skb, ic->type, ic->code, info); + /* ICMP can be shorter but anyways, account it */ + ip_vs_out_stats(cp, skb); + +ignore_ipip: + consume_skb(skb); + verdict = NF_STOLEN; + goto out; + } + /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f51013c..767cc12 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1803,6 +1803,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "pmtu_disc", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3678,7 +3684,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3729,6 +3735,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + ipvs->sysctl_pmtu_disc = 1; + tbl[idx++].data = &ipvs->sysctl_pmtu_disc; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); @@ -3746,7 +3754,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3757,8 +3765,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b20b29c..ad70b7e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); - if (!app) - return -ENOMEM; - INIT_LIST_HEAD(&app->a_list); - INIT_LIST_HEAD(&app->incs_list); - ipvs->ftp_app = app; - ret = register_ip_vs_app(net, app); - if (ret) - goto err_exit; + app = register_ip_vs_app(net, &ip_vs_ftp); + if (IS_ERR(app)) + return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) @@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, app); -err_exit: - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); return ret; } /* @@ -474,10 +466,7 @@ err_exit: */ static void __ip_vs_ftp_exit(struct net *net) { - struct netns_ipvs *ipvs = net_ipvs(net); - - unregister_ip_vs_app(net, ipvs->ftp_app); - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65b616a..543a554 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -49,6 +49,7 @@ enum { IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to * local */ + IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ }; /* @@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +/* Get route to daddr, update *saddr, optionally bind route to saddr */ +static struct rtable *do_output_route4(struct net *net, __be32 daddr, + u32 rtos, int rt_mode, __be32 *saddr) +{ + struct flowi4 fl4; + struct rtable *rt; + int loop = 0; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; + fl4.flowi4_tos = rtos; + +retry: + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + /* Invalid saddr ? */ + if (PTR_ERR(rt) == -EINVAL && *saddr && + rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { + *saddr = 0; + flowi4_update_output(&fl4, 0, rtos, daddr, 0); + goto retry; + } + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); + return NULL; + } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + ip_rt_put(rt); + *saddr = fl4.saddr; + flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + loop++; + goto retry; + } + *saddr = fl4.saddr; + return rt; +} + /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, @@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = dest->addr.ip; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { + rt = do_output_route4(net, dest->addr.ip, rtos, + rt_mode, &dest->dst_saddr.ip); + if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - dest->dst_saddr.ip = fl4.saddr; IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " "rtos=%X\n", &dest->addr.ip, &dest->dst_saddr.ip, @@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, *ret_saddr = dest->dst_saddr.ip; spin_unlock(&dest->dst_lock); } else { - struct flowi4 fl4; + __be32 saddr = htonl(INADDR_ANY); - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = daddr; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &daddr); + /* For such unconfigured boxes avoid many route lookups + * for performance reasons because we do not remember saddr + */ + rt_mode &= ~IP_VS_RT_MODE_CONNECT; + rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + if (!rt) return NULL; - } if (ret_saddr) - *ret_saddr = fl4.saddr; + *ret_saddr = saddr; } local = rt->rt_flags & RTCF_LOCAL; @@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) old_dst = dest->dst_cache; dest->dst_cache = NULL; dst_release(old_dst); + dest->dst_saddr.ip = 0; } #define IP_VS_XMIT_TUNNEL(skb, cp) \ @@ -766,12 +795,13 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; - __be16 df = old_iph->frag_off; + __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -781,7 +811,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { @@ -796,13 +827,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } - if (skb_dst(skb)) + if (rt_is_output_route(skb_rtable(skb))) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - df |= (old_iph->frag_off & htons(IP_DF)); + /* Copy DF, reset fragment offset and MF */ + df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if ((old_iph->frag_off & htons(IP_DF) && - mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { + if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 0dc6385..51e928d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -21,7 +21,6 @@ #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> -#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net, nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l3proto, proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net, nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b2e7310..d7ec928 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, - be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES]))); } if (tb[NFACCT_PKTS]) { atomic64_set(&nfacct->pkts, - be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d683619..32a1ba3 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; - tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); return 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 14e2f39..be194b1 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -55,6 +55,7 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; + struct user_namespace *peer_user_ns; /* User namespace of the peer process */ int peer_pid; /* PID of the peer process */ /* configurable parameters */ @@ -132,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid) +instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -162,6 +163,7 @@ instance_create(u_int16_t group_num, int pid) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); + inst->peer_user_ns = user_ns; inst->peer_pid = pid; inst->group_num = group_num; @@ -503,8 +505,11 @@ __build_packet_message(struct nfulnl_instance *inst, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { struct file *file = skb->sk->sk_socket->file; - __be32 uid = htonl(file->f_cred->fsuid); - __be32 gid = htonl(file->f_cred->fsgid); + __be32 uid = htonl(from_kuid_munged(inst->peer_user_ns, + file->f_cred->fsuid)); + __be32 gid = htonl(from_kgid_munged(inst->peer_user_ns, + file->f_cred->fsgid)); + /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) @@ -783,7 +788,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).pid, + sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index ff5f75f..02a2bf4 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -363,10 +363,12 @@ static void dump_ipv4_packet(struct sbuff *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + const struct cred *cred = skb->sk->sk_socket->file->f_cred; sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } read_unlock_bh(&skb->sk->sk_callback_lock); } @@ -719,10 +721,12 @@ static void dump_ipv6_packet(struct sbuff *m, /* Max length: 15 "UID=4294967295 " */ if ((logflags & XT_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + const struct cred *cred = skb->sk->sk_socket->file->f_cred; sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 7babe7d..817f9e9 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); /* packets in either direction go into same queue */ - if (iph->saddr < iph->daddr) + if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, (__force u32)iph->daddr, iph->protocol, jhash_initval); @@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb) const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; - if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) { + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { a = (__force u32) ip6h->saddr.s6_addr32[3]; b = (__force u32) ip6h->daddr.s6_addr32[3]; } else { @@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb) a = (__force u32) ip6h->daddr.s6_addr32[3]; } - if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1]) + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) c = (__force u32) ip6h->saddr.s6_addr32[1]; else c = (__force u32) ip6h->daddr.s6_addr32[1]; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 846f895..a5e673d 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) mss <<= 8; mss |= optp[2]; - mss = ntohs(mss); + mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: loop_cont = 1; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 772d738..ca2e577 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -17,6 +17,17 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_owner.h> +static int owner_check(const struct xt_mtchk_param *par) +{ + struct xt_owner_match_info *info = par->matchinfo; + + /* For now only allow adding matches from the initial user namespace */ + if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && + (current_user_ns() != &init_user_ns)) + return -EINVAL; + return 0; +} + static bool owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -37,17 +48,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; - if (info->match & XT_OWNER_UID) - if ((filp->f_cred->fsuid >= info->uid_min && - filp->f_cred->fsuid <= info->uid_max) ^ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); + kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + if ((uid_gte(filp->f_cred->fsuid, uid_min) && + uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) return false; + } - if (info->match & XT_OWNER_GID) - if ((filp->f_cred->fsgid >= info->gid_min && - filp->f_cred->fsgid <= info->gid_max) ^ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); + kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + if ((gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) return false; + } return true; } @@ -56,6 +73,7 @@ static struct xt_match owner_mt_reg __read_mostly = { .name = "owner", .revision = 1, .family = NFPROTO_UNSPEC, + .checkentry = owner_check, .match = owner_mt, .matchsize = sizeof(struct xt_owner_match_info), .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ae2ad1e..4635c9b 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -317,6 +317,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; + kuid_t uid; + kgid_t gid; #endif unsigned int i; int ret = -EINVAL; @@ -372,6 +374,13 @@ static int recent_mt_check(const struct xt_mtchk_param *par, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS + uid = make_kuid(&init_user_ns, ip_list_uid); + gid = make_kgid(&init_user_ns, ip_list_gid); + if (!uid_valid(uid) || !gid_valid(gid)) { + kfree(t); + ret = -EINVAL; + goto out; + } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { @@ -379,8 +388,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, ret = -ENOMEM; goto out; } - pde->uid = ip_list_uid; - pde->gid = ip_list_gid; + pde->uid = uid; + pde->gid = gid; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); |