diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 11 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 6 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 53 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 20 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 26 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_rpfilter.c | 8 | ||||
-rw-r--r-- | net/ipv4/ping.c | 4 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 77 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 34 |
16 files changed, 94 insertions, 187 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fcf104e..24b384b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -248,12 +248,8 @@ EXPORT_SYMBOL(inet_listen); u32 inet_ehash_secret __read_mostly; EXPORT_SYMBOL(inet_ehash_secret); -u32 ipv6_hash_secret __read_mostly; -EXPORT_SYMBOL(ipv6_hash_secret); - /* - * inet_ehash_secret must be set exactly once, and to a non nul value - * ipv6_hash_secret must be set exactly once. + * inet_ehash_secret must be set exactly once */ void build_ehash_secret(void) { @@ -263,8 +259,7 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) - get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + cmpxchg(&inet_ehash_secret, 0, rnd); } EXPORT_SYMBOL(build_ehash_secret); @@ -1595,7 +1590,7 @@ static const struct net_offload udp_offload = { static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, - .err_handler = icmp_err, + .err_handler = ping_err, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d..3b4f0cd 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,6 +139,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ + err = -ENOMEM; + esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -174,10 +176,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) { - err = -ENOMEM; + if (!tmp) goto error; - } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d8bbe94..17ff9fd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -69,7 +69,6 @@ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/fcntl.h> -#include <linux/sysrq.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> @@ -769,30 +768,6 @@ static void icmp_redirect(struct sk_buff *skb) } /* - * 32bit and 64bit have different timestamp length, so we check for - * the cookie at offset 20 and verify it is repeated at offset 50 - */ -#define CO_POS0 20 -#define CO_POS1 50 -#define CO_SIZE sizeof(int) -#define ICMP_SYSRQ_SIZE 57 - -/* - * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie - * pattern and if it matches send the next byte as a trigger to sysrq. - */ -static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) -{ - int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); - char *p = skb->data; - - if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && - !memcmp(&cookie, p + CO_POS1, CO_SIZE) && - p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) - handle_sysrq(p[CO_POS0 + CO_SIZE]); -} - -/* * Handle ICMP_ECHO ("ping") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo @@ -819,11 +794,6 @@ static void icmp_echo(struct sk_buff *skb) icmp_param.data_len = skb->len; icmp_param.head_len = sizeof(struct icmphdr); icmp_reply(&icmp_param, skb); - - if (skb->len == ICMP_SYSRQ_SIZE && - net->ipv4.sysctl_icmp_echo_sysrq) { - icmp_check_sysrq(net, skb); - } } } @@ -964,29 +934,6 @@ error: goto drop; } -void icmp_err(struct sk_buff *skb, u32 info) -{ - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; - struct net *net = dev_net(skb->dev); - - /* - * Use ping_err to handle all icmp errors except those - * triggered by ICMP_ECHOREPLY which sent from kernel. - */ - if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); - return; - } - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0); - else if (type == ICMP_REDIRECT) - ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0); -} - /* * This table is the definition of how we handle ICMP. */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 03f5af7..4750d2b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,7 +21,6 @@ #include <linux/rtnetlink.h> #include <linux/slab.h> -#include <net/sock.h> #include <net/inet_frag.h> static void inet_frag_secret_rebuild(unsigned long dummy) @@ -277,7 +276,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, { struct inet_frag_queue *q; struct hlist_node *n; - int depth = 0; hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { @@ -285,25 +283,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, read_unlock(&f->lock); return q; } - depth++; } read_unlock(&f->lock); - if (depth <= INETFRAGS_MAXDEPTH) - return inet_frag_create(nf, f, key); - else - return ERR_PTR(-ENOBUFS); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); - -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix) -{ - static const char msg[] = "inet_frag_find: Fragment hash bucket" - " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) - ". Dropping fragment.\n"; - - if (PTR_ERR(q) == -ENOBUFS) - LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); -} -EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0fcfee3..eb9d63a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -255,7 +255,8 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb has no dst, perform route lookup again */ + /* skb dst is stale, drop it, and perform route lookup again */ + skb_dst_drop(head); iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -298,11 +299,14 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); - return NULL; - } + if (q == NULL) + goto out_nomem; + return container_of(q, struct ipq, q); + +out_nomem: + LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); + return NULL; } /* Is the fragment too far ahead to be part of ipq? */ @@ -524,16 +528,8 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) { - unsigned long orefdst = skb->_skb_refdst; - - skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, prev, dev); - skb->_skb_refdst = orefdst; - return err; - } - - skb_dst_drop(skb); + qp->q.meat == qp->q.len) + return ip_frag_reasm(qp, prev, dev); write_lock(&ip4_frags.lock); list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a85062b..e81b1ca 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -761,7 +761,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - tiph = (const struct iphdr *)skb->data; + if (skb->protocol == htons(ETH_P_IP)) + tiph = (const struct iphdr *)skb->data; + else + tiph = &tunnel->parms.iph; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 8433ffe..a47fc1d 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -370,6 +370,7 @@ int ip_options_compile(struct net *net, } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: + opt->ts = optptr - iph; if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; @@ -380,6 +381,7 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } + opt->ts = optptr - iph; if (rt) { spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); @@ -394,6 +396,7 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } + opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); @@ -426,12 +429,12 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 3; goto error; } + opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } - opt->ts = optptr - iph; break; case IPOPT_RA: if (optlen < 4) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 253692b..3e98ed2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1508,8 +1508,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, if (IS_ERR(rt)) return; - get_cpu_light(); - inet = &__get_cpu_var(unicast_sock); + inet = &get_cpu_var(unicast_sock); inet->tos = arg->tos; sk = &inet->sk; @@ -1533,7 +1532,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ip_push_pending_frames(sk, &fl4); } - put_cpu_light(); + put_cpu_var(unicast_sock); ip_rt_put(rt); } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c49dcd0..c301300 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,12 +66,6 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } -static bool rpfilter_is_local(const struct sk_buff *skb) -{ - const struct rtable *rt = skb_rtable(skb); - return rt && (rt->rt_flags & RTCF_LOCAL); -} - static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -82,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (rpfilter_is_local(skb)) + if (par->in->flags & IFF_LOOPBACK) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index dc454cc..6f9c072 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -322,8 +322,8 @@ void ping_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type = icmph->type; + int code = icmph->code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f962f19..b236ef0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -348,8 +348,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44bf3b0..d84400b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -815,13 +815,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .procname = "icmp_echo_sysrq", - .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 45b63ca..2aa69c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -773,7 +773,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb->reserved_tailroom = skb->end - skb->tail - size; + skb->avail_size = size; return skb; } __kfree_skb(skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b4e8b79..ad70a96 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -116,7 +116,6 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ -#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -2065,8 +2064,11 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tp->undo_marker = tp->snd_una; - if (how) { + if (!how) { + /* Push undo marker, if it was plain RTO and nothing + * was retransmitted. */ + tp->undo_marker = tp->snd_una; + } else { tp->sacked_out = 0; tp->fackets_out = 0; } @@ -3573,27 +3575,6 @@ static void tcp_send_challenge_ack(struct sock *sk) } } -static void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3646,12 +3627,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - if (flag & FLAG_UPDATE_TS_RECENT) - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3968,6 +3943,27 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif +static inline void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5502,9 +5498,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tcp_checksum_complete_user(sk, skb)) goto csum_error; - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - /* Predicted packet is in window by definition. * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: @@ -5516,6 +5509,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ @@ -5563,9 +5559,14 @@ slow_path: return 0; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) goto discard; + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5999,8 +6000,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6151,6 +6151,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + /* step 6: check the URG bit */ tcp_urg(sk, skb, th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9130a9..eadb693 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -274,6 +274,13 @@ static void tcp_v4_mtu_reduced(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 mtu = tcp_sk(sk)->mtu_info; + /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs + * send out by Linux are always <576bytes so they should go through + * unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + return; + dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -401,13 +408,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ - /* We are not interested in TCP_LISTEN and open_requests - * (SYN-ACKs send out by Linux are always <576bytes so - * they should go through unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - goto out; - tp->mtu_info = info; if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a9f50ee..5d45159 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1298,6 +1298,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); + skb->avail_size -= eat; len -= eat; if (!len) return; @@ -1350,8 +1351,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) return 0; } -/* Calculate MSS not accounting any TCP options. */ -static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) +/* Calculate MSS. Not accounting for SACKs here. */ +int tcp_mtu_to_mss(struct sock *sk, int pmtu) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1380,15 +1381,11 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) mss_now = 48; - return mss_now; -} -/* Calculate MSS. Not accounting for SACKs here. */ -int tcp_mtu_to_mss(struct sock *sk, int pmtu) -{ - /* Subtract TCP options size, not including SACKs */ - return __tcp_mtu_to_mss(sk, pmtu) - - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); + /* Now subtract TCP options size, not including SACKs */ + mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); + + return mss_now; } /* Inverse of above */ @@ -1809,11 +1806,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. - * Do not rearm the timer if already set to not break TCP ACK clocking. - */ - if (!tp->tso_deferred) - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. */ + tp->tso_deferred = 1 | (jiffies << 1); return true; @@ -2388,12 +2382,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it - * and check if ack-trimming & collapsing extended the headroom - * beyond what csum_start can cover. - */ - if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || - skb_headroom(skb) >= 0xFFFF)) { + /* make sure skb->data is aligned on arches that require it */ + if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : @@ -2940,7 +2930,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) */ if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; syn_data = skb_copy_expand(syn, skb_headroom(syn), space, |