From c386578f1cdb4dac230395a951f88027f64346e3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Sep 2015 11:40:49 +0200 Subject: xfrm: Let the flowcache handle its size by default. The xfrm flowcache size is limited by the flowcache limit (4096 * number of online cpus) and the xfrm garbage collector threshold (2 * 32768), whatever is reached first. This means that we can hit the garbage collector limit only on systems with more than 16 cpus. On such systems we simply refuse new allocations if we reach the limit, so new flows are dropped. On syslems with 16 or less cpus, we hit the flowcache limit. In this case, we shrink the flow cache instead of refusing new flows. We increase the xfrm garbage collector threshold to INT_MAX to get the same behaviour, independent of the number of cpus. The xfrm garbage collector threshold can still be set below the flowcache limit to reduce the memory usage of the flowcache. Tested-by: Dan Streetman Signed-off-by: Steffen Klassert diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ebe94f2..260f30b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1199,7 +1199,8 @@ tag - INTEGER xfrm4_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will - refuse new allocations. + refuse new allocations. The value must be set below the flowcache + limit (4096 * number of online cpus) to take effect. igmp_link_local_mcast_reports - BOOLEAN Enable IGMP reports for link local multicast groups in the @@ -1645,7 +1646,8 @@ ratelimit - INTEGER xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will - refuse new allocations. + refuse new allocations. The value must be set below the flowcache + limit (4096 * number of online cpus) to take effect. IPv6 Update by: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0304d16..75e8d48 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -246,7 +246,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa28..2fad593 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,7 +287,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { -- cgit v0.10.2 From e33d4f13d21e9f604194ebc8730077ff39916c50 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 21 Oct 2015 11:48:25 -0400 Subject: xfrm: Fix unaligned access to stats in copy_to_user_state() On sparc, deleting established SAs (e.g., by restarting ipsec) results in unaligned access messages via xfrm_del_sa -> km_state_notify -> xfrm_send_state_notify(). Even though struct xfrm_usersa_info is aligned on 8-byte boundaries, netlink attributes are fundamentally only 4 byte aligned, and this cannot be changed for nla_data() that is passed up to userspace. As a result, the put_unaligned() macro needs to be used to set up potentially unaligned fields such as the xfrm_stats in copy_to_user_state() Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a8de9e3..639e0d5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { @@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); - memcpy(&p->stats, &x->stats, sizeof(p->stats)); + put_unaligned(x->stats.replay_window, &p->stats.replay_window); + put_unaligned(x->stats.replay, &p->stats.replay); + put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed); memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; -- cgit v0.10.2 From 1a14f1e5550a341f76e5c8f596e9b5f8a886dfbc Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:31:23 +0200 Subject: xfrm4: Fix header checks in _decode_session4. We skip the header informations if the data pointer points already behind the header in question for some protocols. This is because we call pskb_may_pull with a negative value converted to unsigened int from pskb_may_pull in this case. Skipping the header informations can lead to incorrect policy lookups, so fix it by a check of the data pointer position before we call pskb_may_pull. Signed-off-by: Steffen Klassert diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 75e8d48..e4d533c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -137,7 +137,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ICMP: - if (pskb_may_pull(skb, xprth + 2 - skb->data)) { + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -146,7 +147,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ESP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -154,7 +156,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_AH: - if (pskb_may_pull(skb, xprth + 8 - skb->data)) { + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -162,7 +165,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_COMP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -170,7 +174,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_GRE: - if (pskb_may_pull(skb, xprth + 12 - skb->data)) { + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { __be16 *greflags = (__be16 *)xprth; __be32 *gre_hdr = (__be32 *)xprth; -- cgit v0.10.2 From ea673a4d3a337184f3c314dcc6300bf02f39e077 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:32:39 +0200 Subject: xfrm4: Reload skb header pointers after calling pskb_may_pull. A call to pskb_may_pull may change the pointers into the packet, so reload the pointers after the call. Signed-off-by: Steffen Klassert diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e4d533c..269b137 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -129,7 +129,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports = (__be16 *)xprth; + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; fl4->fl4_dport = ports[!reverse]; @@ -139,7 +142,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMP: if (xprth + 2 < skb->data || pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp = xprth; + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; fl4->fl4_icmp_type = icmp[0]; fl4->fl4_icmp_code = icmp[1]; @@ -149,7 +155,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr = (__be32 *)xprth; + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; } @@ -158,7 +167,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_AH: if (xprth + 8 < skb->data || pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr = (__be32 *)xprth; + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; } @@ -167,7 +179,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_COMP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr = (__be16 *)xprth; + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } @@ -176,8 +191,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_GRE: if (xprth + 12 < skb->data || pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags = (__be16 *)xprth; - __be32 *gre_hdr = (__be32 *)xprth; + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) -- cgit v0.10.2 From cb866e3298cd7412503fc7e2c265753c853fab9d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:52:58 +0200 Subject: xfrm: Increment statistic counter on inner mode error Increment the LINUX_MIB_XFRMINSTATEMODEERROR statistic counter to notify about dropped packets if we fail to fetch a inner mode. Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 60ce701..ad7f5b3 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -330,8 +330,10 @@ resume: if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) + if (inner_mode == NULL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; + } } if (inner_mode->input(x, skb)) { -- cgit v0.10.2