summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c9
-rw-r--r--net/ipv4/ipip.c137
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/tcp_input.c54
-rw-r--r--net/ipv4/tcp_timer.c81
-rw-r--r--net/ipv4/tunnel4.c72
-rw-r--r--net/ipv4/udp.c2
12 files changed, 321 insertions, 80 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e4..55513e6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
return 0;
}
#endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc8..415e117 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
for_each_netdev(net, dev) {
struct in_device *in_dev;
+
if (on)
dev_disable_lro(dev);
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
+
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
- rcu_read_unlock();
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d09173b..539fa26 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
if (!rtnh_ok(rtnh, remaining))
return -EINVAL;
+ if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ return -EINVAL;
+
nexthop_nh->nh_flags =
(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
goto err_inval;
+ if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ goto err_inval;
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b9..8b4ffd2 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- IPCB(skb)->flags |= IPSKB_FORWARDED;
+ IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e23f141..dde37fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -223,8 +223,10 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *segs;
int ret = 0;
- /* common case: locally created skb or seglen is <= mtu */
- if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
+ /* common case: fragmentation of segments is not allowed,
+ * or seglen is <= mtu
+ */
+ if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index afd6b59..9d847c3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -63,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
struct net_device *dev = skb->dev;
+ int skb_iif = skb->skb_iif;
struct iphdr *iph;
int err;
@@ -72,6 +73,14 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ if (skb_iif && proto == IPPROTO_UDP) {
+ /* Arrived from an ingress interface and got udp encapuslated.
+ * The encapsulated network segment length may exceed dst mtu.
+ * Allow IP Fragmentation of segments.
+ */
+ IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+ }
+
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9783701..4ae3f8e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPIP, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
@@ -177,12 +177,19 @@ out:
return err;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return -1;
@@ -207,24 +226,51 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
+ u8 ipproto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipproto = IPPROTO_IPIP;
+ break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ ipproto = IPPROTO_MPLS;
+ break;
+#endif
+ default:
+ goto tx_error;
+ }
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ if (tiph->protocol != ipproto && tiph->protocol != 0)
goto tx_error;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
@@ -234,6 +280,20 @@ tx_error:
return NETDEV_TX_OK;
}
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+ switch (ipproto) {
+ case 0:
+ case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+ case IPPROTO_MPLS:
+#endif
+ return true;
+ }
+
+ return false;
+}
+
static int
ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ if (p.iph.version != 4 ||
+ !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
return -EINVAL;
}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 0;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
- tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
}
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_TOS */
nla_total_size(1) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
.policy = ipip_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip_tunnel_setup,
+ .validate = ipip_tunnel_validate,
.newlink = ipip_newlink,
.changelink = ipip_changelink,
.dellink = ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 1,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip_err,
+ .priority = 1,
+};
+#endif
+
static int __net_init ipip_init_net(struct net *net)
{
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
{
int err;
- pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+ pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&ipip_net_ops);
if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
pr_info("%s: can't register tunnel\n", __func__);
- goto xfrm_tunnel_failed;
+ goto xfrm_tunnel_ipip_failed;
+ }
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register tunnel\n", __func__);
+ goto xfrm_tunnel_mplsip_failed;
}
+#endif
err = rtnl_link_register(&ipip_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
unregister_pernet_device(&ipip_net_ops);
goto out;
}
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+ if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+ pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
unregister_pernet_device(&ipip_net_ops);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5ad48ec..eec2341 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1150,6 +1150,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
c->mfc_parent = mfc->mfcc_parent;
+ c->mfc_un.res.lastuse = jiffies;
ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
@@ -1748,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->dev->stats.tx_bytes += skb->len;
}
- IPCB(skb)->flags |= IPSKB_FORWARDED;
+ IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
* not only before forwarding, but after forwarding on all output
@@ -1792,6 +1793,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
vif = cache->mfc_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ cache->mfc_un.res.lastuse = jiffies;
if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
@@ -2071,10 +2073,10 @@ drop:
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm)
{
- int ct;
- struct rtnexthop *nhp;
- struct nlattr *mp_attr;
struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mfc_parent >= MAXVIFS)
@@ -2106,7 +2108,10 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES,
+ jiffies_to_clock_t(c->mfc_un.res.lastuse),
+ RTA_PAD))
return -EMSGSIZE;
rtm->rtm_type = RTN_MULTICAST;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94d4aff..f9f9e37 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;
int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
@@ -3424,6 +3424,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
return flag;
}
+static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ u32 *last_oow_ack_time)
+{
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ NET_INC_STATS(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+ *last_oow_ack_time = tcp_time_stamp;
+
+ return false; /* not rate-limited: go ahead, send dupack now! */
+}
+
/* Return true if we're currently rate-limiting out-of-window ACKs and
* thus shouldn't send a dupack right now. We rate-limit dupacks in
* response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
@@ -3437,21 +3454,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
/* Data packets without SYNs are not likely part of an ACK loop. */
if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
!tcp_hdr(skb)->syn)
- goto not_rate_limited;
-
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- NET_INC_STATS(net, mib_idx);
- return true; /* rate-limited: don't send yet! */
- }
- }
-
- *last_oow_ack_time = tcp_time_stamp;
+ return false;
-not_rate_limited:
- return false; /* not rate-limited: go ahead, send dupack now! */
+ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
}
/* RFC 5961 7 [ACK Throttling] */
@@ -3461,21 +3466,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
- u32 now;
+ u32 count, now;
/* First check our per-socket dupack rate limit. */
- if (tcp_oow_rate_limited(sock_net(sk), skb,
- LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
- &tp->last_oow_ack_time))
+ if (__tcp_oow_rate_limited(sock_net(sk),
+ LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+ &tp->last_oow_ack_time))
return;
- /* Then check the check host-wide RFC 5961 rate limit. */
+ /* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
+ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
challenge_timestamp = now;
- challenge_count = 0;
+ WRITE_ONCE(challenge_count, half +
+ prandom_u32_max(sysctl_tcp_challenge_ack_limit));
}
- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+ count = READ_ONCE(challenge_count);
+ if (count > 0) {
+ WRITE_ONCE(challenge_count, count - 1);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index debdd8b..d84930b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -24,6 +24,13 @@
int sysctl_tcp_thin_linear_timeouts __read_mostly;
+/**
+ * tcp_write_err() - close socket and save error info
+ * @sk: The socket the error has appeared on.
+ *
+ * Returns: Nothing (void)
+ */
+
static void tcp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@ -33,16 +40,21 @@ static void tcp_write_err(struct sock *sk)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
-/* Do not allow orphaned sockets to eat all our resources.
- * This is direct violation of TCP specs, but it is required
- * to prevent DoS attacks. It is called when a retransmission timeout
- * or zero probe timeout occurs on orphaned socket.
+/**
+ * tcp_out_of_resources() - Close socket if out of resources
+ * @sk: pointer to current socket
+ * @do_reset: send a last packet with reset flag
*
- * Criteria is still not confirmed experimentally and may change.
- * We kill the socket, if:
- * 1. If number of orphaned sockets exceeds an administratively configured
- * limit.
- * 2. If we have strong memory pressure.
+ * Do not allow orphaned sockets to eat all our resources.
+ * This is direct violation of TCP specs, but it is required
+ * to prevent DoS attacks. It is called when a retransmission timeout
+ * or zero probe timeout occurs on orphaned socket.
+ *
+ * Criteria is still not confirmed experimentally and may change.
+ * We kill the socket, if:
+ * 1. If number of orphaned sockets exceeds an administratively configured
+ * limit.
+ * 2. If we have strong memory pressure.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -74,7 +86,11 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
return 0;
}
-/* Calculate maximal number or retries on an orphaned socket. */
+/**
+ * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
+ * @sk: Pointer to the current socket.
+ * @alive: bool, socket alive state
+ */
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
@@ -115,10 +131,22 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
}
}
-/* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsuccessful, exponentially backed-off
+
+/**
+ * retransmits_timed_out() - returns true if this connection has timed out
+ * @sk: The current socket
+ * @boundary: max number of retransmissions
+ * @timeout: A custom timeout value.
+ * If set to 0 the default timeout is calculated and used.
+ * Using TCP_RTO_MIN and the number of unsuccessful retransmits.
+ * @syn_set: true if the SYN Bit was set.
+ *
+ * The default "timeout" value this function can calculate and use
+ * is equivalent to the timeout of a TCP Connection
+ * after "boundary" unsuccessful, exponentially backed-off
* retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
* syn_set flag is set.
+ *
*/
static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
@@ -257,6 +285,16 @@ out:
sk_mem_reclaim(sk);
}
+
+/**
+ * tcp_delack_timer() - The TCP delayed ACK timeout handler
+ * @data: Pointer to the current socket. (gets casted to struct sock *)
+ *
+ * This function gets (indirectly) called when the kernel timer for a TCP packet
+ * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
+ *
+ * Returns: Nothing (void)
+ */
static void tcp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
@@ -350,10 +388,18 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
-/*
- * The TCP retransmit timer.
- */
+/**
+ * tcp_retransmit_timer() - The TCP retransmit timeout handler
+ * @sk: Pointer to the current socket.
+ *
+ * This function gets called when the kernel timer for a TCP packet
+ * of this socket expires.
+ *
+ * It handles retransmission, timer adjustment and other necesarry measures.
+ *
+ * Returns: Nothing (void)
+ */
void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -494,7 +540,8 @@ out_reset_timer:
out:;
}
-/* Called with BH disabled */
+/* Called with bottom-half processing disabled.
+ Called by tcp_write_timer() */
void tcp_write_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -539,7 +586,7 @@ static void tcp_write_timer(unsigned long data)
if (!sock_owned_by_user(sk)) {
tcp_write_timer_handler(sk);
} else {
- /* deleguate our work to tcp_release_cb() */
+ /* delegate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
sock_hold(sk);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d01718..ec35eaa 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/mpls.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -16,11 +17,14 @@
static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
- return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+ return (family == AF_INET) ? &tunnel4_handlers :
+ (family == AF_INET6) ? &tunnel64_handlers :
+ &tunnelmpls4_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+ struct xfrm_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
static void tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+#endif
+
static const struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
};
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+ .handler = tunnelmpls4_rcv,
+ .err_handler = tunnelmpls4_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+#endif
+
static int __init tunnel4_init(void)
{
- if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
- pr_err("%s: can't add protocol\n", __func__);
- return -EAGAIN;
- }
+ if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+ goto err;
#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
- pr_err("tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
- return -EAGAIN;
+ goto err;
+ }
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+ inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+ goto err;
}
#endif
return 0;
+
+err:
+ pr_err("%s: can't add protocol\n", __func__);
+ return -EAGAIN;
}
static void __exit tunnel4_fini(void)
{
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+ pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca5e8ea..4aed8fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1583,6 +1583,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (sk_filter(sk, skb))
goto drop;
+ if (unlikely(skb->len < sizeof(struct udphdr)))
+ goto drop;
udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {