summaryrefslogtreecommitdiff
path: root/include/net/tcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h360
1 files changed, 171 insertions, 189 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 34f5cc2..3e4b33e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
+/* TCP thin-stream limits */
+#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
+
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
@@ -241,6 +244,8 @@ extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_cookie_size;
+extern int sysctl_tcp_thin_linear_timeouts;
+extern int sysctl_tcp_thin_dupack;
extern atomic_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -263,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
return seq3 - seq2 >= seq1 - seq2;
}
-static inline int tcp_too_many_orphans(struct sock *sk, int num)
+static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
{
- return (num > sysctl_tcp_max_orphans) ||
- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
+ struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+ int orphans = percpu_counter_read_positive(ocp);
+
+ if (orphans << shift > sysctl_tcp_max_orphans) {
+ orphans = percpu_counter_sum_positive(ocp);
+ if (orphans << shift > sysctl_tcp_max_orphans)
+ return true;
+ }
+
+ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
+ return true;
+ return false;
}
/* syncookies: remember time of last synqueue overflow */
@@ -289,46 +304,32 @@ extern struct proto tcp_prot;
#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
-
-extern void tcp_v4_err(struct sk_buff *skb, u32);
-
-extern void tcp_shutdown (struct sock *sk, int how);
-
-extern int tcp_v4_rcv(struct sk_buff *skb);
-
-extern int tcp_v4_remember_stamp(struct sock *sk);
-
-extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-
-extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size);
-extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-
-extern int tcp_ioctl(struct sock *sk,
- int cmd,
- unsigned long arg);
-
-extern int tcp_rcv_state_process(struct sock *sk,
- struct sk_buff *skb,
- struct tcphdr *th,
- unsigned len);
-
-extern int tcp_rcv_established(struct sock *sk,
- struct sk_buff *skb,
- struct tcphdr *th,
- unsigned len);
-
-extern void tcp_rcv_space_adjust(struct sock *sk);
-
-extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
-
-extern int tcp_twsk_unique(struct sock *sk,
- struct sock *sktw, void *twp);
-
-extern void tcp_twsk_destructor(struct sock *sk);
-
-extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
+
+extern void tcp_v4_err(struct sk_buff *skb, u32);
+
+extern void tcp_shutdown (struct sock *sk, int how);
+
+extern int tcp_v4_rcv(struct sk_buff *skb);
+
+extern int tcp_v4_remember_stamp(struct sock *sk);
+extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t size);
+extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags);
+extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ struct tcphdr *th, unsigned len);
+extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ struct tcphdr *th, unsigned len);
+extern void tcp_rcv_space_adjust(struct sock *sk);
+extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
+extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
+extern void tcp_twsk_destructor(struct sock *sk);
+extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
@@ -366,86 +367,59 @@ enum tcp_tw_status {
};
-extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
- struct sk_buff *skb,
- const struct tcphdr *th);
-
-extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
- struct request_sock *req,
- struct request_sock **prev);
-extern int tcp_child_process(struct sock *parent,
- struct sock *child,
- struct sk_buff *skb);
-extern int tcp_use_frto(struct sock *sk);
-extern void tcp_enter_frto(struct sock *sk);
-extern void tcp_enter_loss(struct sock *sk, int how);
-extern void tcp_clear_retrans(struct tcp_sock *tp);
-extern void tcp_update_metrics(struct sock *sk);
-
-extern void tcp_close(struct sock *sk,
- long timeout);
-extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
-
-extern int tcp_getsockopt(struct sock *sk, int level,
- int optname,
- char __user *optval,
- int __user *optlen);
-extern int tcp_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval,
- unsigned int optlen);
-extern int compat_tcp_getsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, int __user *optlen);
-extern int compat_tcp_setsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, unsigned int optlen);
-extern void tcp_set_keepalive(struct sock *sk, int val);
-extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg,
- size_t len, int nonblock,
- int flags, int *addr_len);
-
-extern void tcp_parse_options(struct sk_buff *skb,
- struct tcp_options_received *opt_rx,
- u8 **hvpp,
- int estab);
-
-extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
+extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
+ struct sk_buff *skb,
+ const struct tcphdr *th);
+extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
+ struct request_sock *req,
+ struct request_sock **prev);
+extern int tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
+extern int tcp_use_frto(struct sock *sk);
+extern void tcp_enter_frto(struct sock *sk);
+extern void tcp_enter_loss(struct sock *sk, int how);
+extern void tcp_clear_retrans(struct tcp_sock *tp);
+extern void tcp_update_metrics(struct sock *sk);
+extern void tcp_close(struct sock *sk, long timeout);
+extern unsigned int tcp_poll(struct file * file, struct socket *sock,
+ struct poll_table_struct *wait);
+extern int tcp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int tcp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+extern void tcp_set_keepalive(struct sock *sk, int val);
+extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
+extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags, int *addr_len);
+extern void tcp_parse_options(struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, u8 **hvpp,
+ int estab);
+extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
/*
* TCP v4 functions exported for the inet6 API
*/
-extern void tcp_v4_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
-
-extern int tcp_v4_conn_request(struct sock *sk,
- struct sk_buff *skb);
-
-extern struct sock * tcp_create_openreq_child(struct sock *sk,
- struct request_sock *req,
- struct sk_buff *skb);
-
-extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst);
-
-extern int tcp_v4_do_rcv(struct sock *sk,
+extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+extern struct sock * tcp_create_openreq_child(struct sock *sk,
+ struct request_sock *req,
struct sk_buff *skb);
-
-extern int tcp_v4_connect(struct sock *sk,
- struct sockaddr *uaddr,
- int addr_len);
-
-extern int tcp_connect(struct sock *sk);
-
-extern struct sk_buff * tcp_make_synack(struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req,
- struct request_values *rvp);
-
-extern int tcp_disconnect(struct sock *sk, int flags);
+extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst);
+extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len);
+extern int tcp_connect(struct sock *sk);
+extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+ struct request_sock *req,
+ struct request_values *rvp);
+extern int tcp_disconnect(struct sock *sk, int flags);
/* From syncookies.c */
@@ -456,7 +430,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
__u16 *mss);
extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
/* From net/ipv6/syncookies.c */
extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
@@ -477,10 +451,10 @@ extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
extern void tcp_send_probe0(struct sock *);
extern void tcp_send_partial(struct sock *);
-extern int tcp_write_wakeup(struct sock *);
+extern int tcp_write_wakeup(struct sock *);
extern void tcp_send_fin(struct sock *sk);
extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
-extern int tcp_send_synack(struct sock *);
+extern int tcp_send_synack(struct sock *);
extern void tcp_push_one(struct sock *, unsigned int mss_now);
extern void tcp_send_ack(struct sock *sk);
extern void tcp_send_delayed_ack(struct sock *sk);
@@ -501,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk);
/* Bound MSS / TSO packet size with the half of the window */
static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
{
- if (tp->max_window && pktsize > (tp->max_window >> 1))
- return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+ int cutoff;
+
+ /* When peer uses tiny windows, there is no use in packetizing
+ * to sub-MSS pieces for the sake of SWS or making sure there
+ * are enough packets in the pipe for fast recovery.
+ *
+ * On the other hand, for extremely large MSS devices, handling
+ * smaller than MSS windows in this way does make sense.
+ */
+ if (tp->max_window >= 512)
+ cutoff = (tp->max_window >> 1);
+ else
+ cutoff = tp->max_window;
+
+ if (cutoff && pktsize > cutoff)
+ return max_t(int, cutoff, 68U - tp->tcp_header_len);
else
return pktsize;
}
@@ -584,7 +572,7 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
* scaling applied to the result. The caller does these things
* if necessary. This is a "raw" window selection.
*/
-extern u32 __tcp_select_window(struct sock *sk);
+extern u32 __tcp_select_window(struct sock *sk);
/* TCP timestamps are only 32-bits, this causes a slight
* complication on 64-bit systems since we store a snapshot
@@ -594,12 +582,22 @@ extern u32 __tcp_select_window(struct sock *sk);
*/
#define tcp_time_stamp ((__u32)(jiffies))
+#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+
/* This is what the send packet queuing engine uses to pass
- * TCP per-packet control information to the transmission
- * code. We also store the host-order sequence numbers in
- * here too. This is 36 bytes on 32-bit architectures,
- * 40 bytes on 64-bit machines, if this grows please adjust
- * skbuff.h:skbuff->cb[xxx] size appropriately.
+ * TCP per-packet control information to the transmission code.
+ * We also store the host-order sequence numbers in here too.
+ * This is 44 bytes if IPV6 is enabled.
+ * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
*/
struct tcp_skb_cb {
union {
@@ -612,19 +610,6 @@ struct tcp_skb_cb {
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
__u32 when; /* used to compute rtt's */
__u8 flags; /* TCP header flags. */
-
- /* NOTE: These must match up to the flags byte in a
- * real TCP header.
- */
-#define TCPCB_FLAG_FIN 0x01
-#define TCPCB_FLAG_SYN 0x02
-#define TCPCB_FLAG_RST 0x04
-#define TCPCB_FLAG_PSH 0x08
-#define TCPCB_FLAG_ACK 0x10
-#define TCPCB_FLAG_URG 0x20
-#define TCPCB_FLAG_ECE 0x40
-#define TCPCB_FLAG_CWR 0x80
-
__u8 sacked; /* State flags for SACK/FACK. */
#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
@@ -856,13 +841,6 @@ static inline void tcp_check_probe_timer(struct sock *sk)
icsk->icsk_rto, TCP_RTO_MAX);
}
-static inline void tcp_push_pending_frames(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
-}
-
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
{
tp->snd_wl1 = seq;
@@ -939,7 +917,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
- wake_up_interruptible_poll(sk->sk_sleep,
+ wake_up_interruptible_sync_poll(sk_sleep(sk),
POLLIN | POLLRDNORM | POLLRDBAND);
if (!inet_csk_ack_scheduled(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
@@ -972,7 +950,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
/* Determine a window scaling and initial window to offer. */
extern void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
- int wscale_ok, __u8 *rcv_wscale);
+ int wscale_ok, __u8 *rcv_wscale,
+ __u32 init_rcv_wnd);
static inline int tcp_win_from_space(int space)
{
@@ -1031,6 +1010,14 @@ static inline int keepalive_probes(const struct tcp_sock *tp)
return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
}
+static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+{
+ const struct inet_connection_sock *icsk = &tp->inet_conn;
+
+ return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
+ tcp_time_stamp - tp->rcv_tstamp);
+}
+
static inline int tcp_fin_time(const struct sock *sk)
{
int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
@@ -1166,22 +1153,14 @@ struct tcp_md5sig_pool {
#define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */
/* - functions */
-extern int tcp_v4_md5_hash_skb(char *md5_hash,
- struct tcp_md5sig_key *key,
- struct sock *sk,
- struct request_sock *req,
- struct sk_buff *skb);
-
-extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
- struct sock *addr_sk);
-
-extern int tcp_v4_md5_do_add(struct sock *sk,
- __be32 addr,
- u8 *newkey,
- u8 newkeylen);
-
-extern int tcp_v4_md5_do_del(struct sock *sk,
- __be32 addr);
+extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk, struct request_sock *req,
+ struct sk_buff *skb);
+extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk,
+ struct sock *addr_sk);
+extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey,
+ u8 newkeylen);
+extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr);
#ifdef CONFIG_TCP_MD5SIG
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \
@@ -1193,33 +1172,18 @@ extern int tcp_v4_md5_do_del(struct sock *sk,
#define tcp_twsk_md5_key(twsk) NULL
#endif
-extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *);
-extern void tcp_free_md5sig_pool(void);
+extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
+extern void tcp_free_md5sig_pool(void);
+
+extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
+extern void tcp_put_md5sig_pool(void);
-extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu);
-extern void __tcp_put_md5sig_pool(void);
extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
unsigned header_len);
extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
struct tcp_md5sig_key *key);
-static inline
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
-{
- int cpu = get_cpu();
- struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
- if (!ret)
- put_cpu();
- return ret;
-}
-
-static inline void tcp_put_md5sig_pool(void)
-{
- __tcp_put_md5sig_pool();
- put_cpu();
-}
-
/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
{
@@ -1342,6 +1306,15 @@ static inline int tcp_write_queue_empty(struct sock *sk)
return skb_queue_empty(&sk->sk_write_queue);
}
+static inline void tcp_push_pending_frames(struct sock *sk)
+{
+ if (tcp_send_head(sk)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
+ }
+}
+
/* Start sequence of the highest skb with SACKed bit, valid only if
* sacked > 0 or when the caller has ensured validity by itself.
*/
@@ -1381,6 +1354,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk,
tcp_sk(sk)->highest_sack = new;
}
+/* Determines whether this is a thin stream (which may suffer from
+ * increased latency). Used to trigger latency-reducing mechanisms.
+ */
+static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp)
+{
+ return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
+}
+
/* /proc */
enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING,
@@ -1401,7 +1382,8 @@ struct tcp_iter_state {
sa_family_t family;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
- int bucket, sbucket, num, uid;
+ int bucket, offset, sbucket, num, uid;
+ loff_t last_pos;
};
extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
@@ -1422,7 +1404,7 @@ extern int tcp_gro_complete(struct sk_buff *skb);
extern int tcp4_gro_complete(struct sk_buff *skb);
#ifdef CONFIG_PROC_FS
-extern int tcp4_proc_init(void);
+extern int tcp4_proc_init(void);
extern void tcp4_proc_exit(void);
#endif