summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-24 23:51:40 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-24 23:51:40 (GMT)
commit701b259f446be2f3625fb852bceb93afe76e206d (patch)
tree93f15bcd00bd59c38b4e59fed9af7ddf6b06c8b3 /net/ipv4/tcp_input.c
parentd2346963bfcbb9a8ee783ca3c3b3bdd7448ec9d5 (diff)
parentefc3dbc37412c027e363736b4f4c74ee5e8ecffc (diff)
downloadlinux-701b259f446be2f3625fb852bceb93afe76e206d.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Davem says: 1) Fix JIT code generation on x86-64 for divide by zero, from Eric Dumazet. 2) tg3 header length computation correction from Eric Dumazet. 3) More build and reference counting fixes for socket memory cgroup code from Glauber Costa. 4) module.h snuck back into a core header after all the hard work we did to remove that, from Paul Gortmaker and Jesper Dangaard Brouer. 5) Fix PHY naming regression and add some new PCI IDs in stmmac, from Alessandro Rubini. 6) Netlink message generation fix in new team driver, should only advertise the entries that changed during events, from Jiri Pirko. 7) SRIOV VF registration and unregistration fixes, and also add a missing PCI ID, from Roopa Prabhu. 8) Fix infinite loop in tx queue flush code of brcmsmac, from Stanislaw Gruszka. 9) ftgmac100/ftmac100 build fix, missing interrupt.h include. 10) Memory leak fix in net/hyperv do_set_mutlicast() handling, from Wei Yongjun. 11) Off by one fix in netem packet scheduler, from Vijay Subramanian. 12) TCP loss detection fix from Yuchung Cheng. 13) TCP reset packet MD5 calculation uses wrong address, fix from Shawn Lu. 14) skge carrier assertion and DMA mapping fixes from Stephen Hemminger. 15) Congestion recovery undo performed at the wrong spot in BIC and CUBIC congestion control modules, fix from Neal Cardwell. 16) Ethtool ETHTOOL_GSSET_INFO is unnecessarily restrictive, from Michał Mirosław. 17) Fix triggerable race in ipv6 sysctl handling, from Francesco Ruggeri. 18) Statistics bug fixes in mlx4 from Eugenia Emantayev. 19) rds locking bug fix during info dumps, from your's truly. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (67 commits) rds: Make rds_sock_lock BH rather than IRQ safe. netprio_cgroup.h: dont include module.h from other includes net: flow_dissector.c missing include linux/export.h team: send only changed options/ports via netlink net/hyperv: fix possible memory leak in do_set_multicast() drivers/net: dsa/mv88e6xxx.c files need linux/module.h stmmac: added PCI identifiers llc: Fix race condition in llc_ui_recvmsg stmmac: fix phy naming inconsistency dsa: Add reporting of silicon revision for Marvell 88E6123/88E6161/88E6165 switches. tg3: fix ipv6 header length computation skge: add byte queue limit support mv643xx_eth: Add Rx Discard and Rx Overrun statistics bnx2x: fix compilation error with SOE in fw_dump bnx2x: handle CHIP_REVISION during init_one bnx2x: allow user to change ring size in ISCSI SD mode bnx2x: fix Big-Endianess in ethtool -t bnx2x: fixed ethtool statistics for MF modes bnx2x: credit-leakage fixup on vlan_mac_del_all macvlan: fix a possible use after free ...
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c41
1 files changed, 15 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e..976034f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
#define FLAG_ECE 0x40 /* ECE in this ACK */
-#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
* These 6 states form finite state machine, controlled by the following events:
* 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
* 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of one of three flavors:
+ * 3. Loss detection event of two flavors:
* A. Scoreboard estimator decided the packet is lost.
* A'. Reno "three dupacks" marks head of queue lost.
- * A''. Its FACK modfication, head until snd.fack is lost.
- * B. SACK arrives sacking data transmitted after never retransmitted
- * hole was sent out.
- * C. SACK arrives sacking SND.NXT at the moment, when the
+ * A''. Its FACK modification, head until snd.fack is lost.
+ * B. SACK arrives sacking SND.NXT at the moment, when the
* segment was retransmitted.
* 4. D-SACK added new rule: D-SACK changes any tag to S.
*
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
}
/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "C". Later note: FACK people cheated me again 8), we have to account
+ * Event "B". Later note: FACK people cheated me again 8), we have to account
* for reordering! Ugly, but should help.
*
* Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (found_dup_sack && ((i + 1) == first_sack_index))
next_dup = &sp[i + 1];
- /* Event "B" in the comment above. */
- if (after(end_seq, tp->high_seq))
- state.flag |= FLAG_DATA_LOST;
-
/* Skip too early cached blocks */
while (tcp_sack_cache_ok(tp, cache) &&
!before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
tcp_verify_left_out(tp);
}
-/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
- * is against sacked "cnt", otherwise it's against facked "cnt"
+/* Detect loss in event "A" above by marking head of queue up as lost.
+ * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
*/
static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
int cnt, oldcnt;
int err;
unsigned int mss;
+ /* Use SACK to deduce losses of new sequences sent during recovery */
+ const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
WARN_ON(packets > tp->packets_out);
if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
- if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+ if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
break;
oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (tcp_check_sack_reneging(sk, flag))
return;
- /* C. Process data loss notification, provided it is valid. */
- if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
- before(tp->snd_una, tp->high_seq) &&
- icsk->icsk_ca_state != TCP_CA_Open &&
- tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
- }
-
- /* D. Check consistency of the current state. */
+ /* C. Check consistency of the current state. */
tcp_verify_left_out(tp);
- /* E. Check state exit conditions. State can be terminated
+ /* D. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
}
}
- /* F. Process state. */
+ /* E. Process state. */
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {