summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c9
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c2
14 files changed, 81 insertions, 45 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f0cdb30..57bd978 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -367,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
static inline void free_leaf(struct leaf *l)
{
- call_rcu_bh(&l->rcu, __leaf_free_rcu);
+ call_rcu(&l->rcu, __leaf_free_rcu);
}
static inline void free_leaf_info(struct leaf_info *leaf)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17..7f75f21 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *newinet = inet_sk(newsk);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct net *net = sock_net(sk);
struct flowi4 *fl4;
struct rtable *rt;
fl4 = &newinet->cork.fl.u.ip4;
+
+ rcu_read_lock();
+ opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52..c196d74 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
- if (neigh) {
+ if (!IS_ERR(neigh)) {
int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ihl = 5;
iph->tos = inet->tos;
iph->frag_off = df;
- ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
+ ip_select_ident(iph, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1366,9 +1366,8 @@ out:
return skb;
}
-int ip_send_skb(struct sk_buff *skb)
+int ip_send_skb(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
int err;
err = ip_local_out(skb);
@@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
return 0;
/* Netfilter gets whole the not fragmented skb. */
- return ip_send_skb(skb);
+ return ip_send_skb(sock_net(sk), skb);
}
/*
@@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ skb_orphan(nskb);
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a2381..4ad9cf1 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
hdr, NULL, &matchoff, &matchlen,
&addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
+ unsigned int olen, matchend, poff, plen, buflen, n;
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
/* We're only interested in headers related to this
@@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
goto next;
}
+ olen = *datalen;
if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
&addr, port))
return NF_DROP;
- matchend = matchoff + matchlen;
+ matchend = matchoff + matchlen + *datalen - olen;
/* The maddr= parameter (RFC 2361) specifies where to send
* the reply. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"maddr=", &poff, &plen,
- &addr) > 0 &&
+ &addr, true) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
buflen = sprintf(buffer, "%pI4",
@@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
* from which the server received the request. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"received=", &poff, &plen,
- &addr) > 0 &&
+ &addr, false) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
buflen = sprintf(buffer, "%pI4",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c035251..fd9ecb5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -80,7 +79,6 @@
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
@@ -88,11 +86,9 @@
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -2032,7 +2028,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
dev_out = net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
- res.fi = NULL;
flags |= RTCF_LOCAL;
goto make_route;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7e6eea..2109ff4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16..1432cdb 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fd2bc9..85308b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5392,6 +5392,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (unlikely(sk->sk_rx_dst == NULL))
+ inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
* The code loosely follows the one in the famous
@@ -5605,7 +5607,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
if (skb != NULL) {
- inet_sk_rx_dst_set(sk, skb);
+ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 42b2a6a..00a748d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
tp->mtu_info = info;
- if (!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
- else
- set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+ } else {
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ sock_hold(sk);
+ }
goto out;
}
@@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
+ inet_sk_rx_dst_set(newsk, skb);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
@@ -1627,9 +1630,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- if (unlikely(sk->sk_rx_dst == NULL))
- inet_sk_rx_dst_set(sk, skb);
-
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1872,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_destructor= tcp_twsk_destructor,
};
+void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(inet_sk_rx_dst_set);
+
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a63..0abe67b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
static void __net_exit tcp_net_metrics_exit(struct net *net)
{
+ unsigned int i;
+
+ for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
+ struct tcp_metrics_block *tm, *next;
+
+ tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
+ while (tm) {
+ next = rcu_dereference_protected(tm->tcpm_next, 1);
+ kfree(tm);
+ tm = next;
+ }
+ }
kfree(net->ipv4.tcp_metrics_hash);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 232a90c..6ff7f10 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_sock *oldtp = tcp_sk(sk);
struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
- inet_sk_rx_dst_set(newsk, skb);
-
/* TCP Cookie Transactions require space for the cookie pair,
* as it differs for each connection. There is no need to
* copy any s_data_payload stored at the original socket.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f1bcff..d046326 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+ if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
-
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
tcp_delack_timer_handler(sk);
-
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
sk->sk_prot->mtu_reduced(sk);
+ __sock_put(sk);
+ }
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-void tcp_wfree(struct sk_buff *skb)
+static void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad..b774a03 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
inet_csk(sk)->icsk_ack.blocked = 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
tcp_write_timer_handler(sk);
} else {
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4c3582..6f6d1ac 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->check = CSUM_MANGLED_0;
send:
- err = ip_send_skb(skb);
+ err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
UDP_INC_STATS_USER(sock_net(sk),