diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp_cong.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 25 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 70 |
5 files changed, 87 insertions, 75 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 04dbd7a..4d4db16 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); void tcp_slow_start(struct tcp_sock *tp) { int cnt; /* increase in packets */ + unsigned int delta = 0; /* RFC3465: ABC Slow start * Increase only after a full MSS of bytes is acked @@ -333,9 +334,9 @@ void tcp_slow_start(struct tcp_sock *tp) tp->snd_cwnd_cnt += cnt; while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { tp->snd_cwnd_cnt -= tp->snd_cwnd; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; + delta++; } + tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_slow_start); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e67d685..21d7f8f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2521,7 +2521,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ -static inline int tcp_packet_delayed(const struct tcp_sock *tp) +static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { return !tp->retrans_stamp || (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2582,7 +2582,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_may_undo(const struct tcp_sock *tp) +static inline bool tcp_may_undo(const struct tcp_sock *tp) { return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } @@ -3371,13 +3371,13 @@ static void tcp_ack_probe(struct sock *sk) } } -static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) +static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) { return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open; } -static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) +static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && @@ -3387,7 +3387,7 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline int tcp_may_update_window(const struct tcp_sock *tp, +static inline bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, const u32 ack_seq, const u32 nwin) { @@ -4006,7 +4006,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); } -static inline int tcp_paws_discard(const struct sock *sk, +static inline bool tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); @@ -4028,7 +4028,7 @@ static inline int tcp_paws_discard(const struct sock *sk, * (borrowed from freebsd) */ -static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) +static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) { return !before(end_seq, tp->rcv_wup) && !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); @@ -5214,7 +5214,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, return result; } -static inline int tcp_checksum_complete_user(struct sock *sk, +static inline bool tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 99779ae..992f1bf 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/tcp.h> +#include <linux/hash.h> #include <net/inet_connection_sock.h> #include <net/net_namespace.h> @@ -228,10 +229,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -265,10 +264,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = twsk_net(tw); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -302,10 +299,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); tm = __tcp_get_metrics(&addr, net, hash); reclaim = false; @@ -694,7 +689,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static unsigned long tcpmhash_entries; +static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { ssize_t ret; @@ -702,7 +697,7 @@ static int __init set_tcpmhash_entries(char *str) if (!str) return 0; - ret = kstrtoul(str, 0, &tcpmhash_entries); + ret = kstrtouint(str, 0, &tcpmhash_entries); if (ret) return 0; @@ -712,7 +707,8 @@ __setup("tcpmhash_entries=", set_tcpmhash_entries); static int __net_init tcp_net_metrics_init(struct net *net) { - int slots, size; + size_t size; + unsigned int slots; slots = tcpmhash_entries; if (!slots) { @@ -722,14 +718,13 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - size = slots * sizeof(struct tcpm_hash_bucket); + net->ipv4.tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); if (!net->ipv4.tcp_metrics_hash) return -ENOMEM; - net->ipv4.tcp_metrics_hash_mask = (slots - 1); - return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 27a32ac..950aebf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -837,6 +837,13 @@ struct tsq_tasklet { }; static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); +static void tcp_tsq_handler(struct sock *sk) +{ + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) + tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); +} /* * One tasklest per cpu tries to send more skbs. * We run in tasklet context but need to disable irqs when @@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); + tcp_tsq_handler(sk); } else { /* defer the work to tcp_release_cb() */ - set_bit(TSQ_OWNED, &tp->tsq_flags); + set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); } bh_unlock_sock(sk); @@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data) } } +#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ + (1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED)) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data) void tcp_release_cb(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + unsigned long flags, nflags; - if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); - } + /* perform an atomic operation only if at least one flag is set */ + do { + flags = tp->tsq_flags; + if (!(flags & TCP_DEFERRED_ALL)) + return; + nflags = flags & ~TCP_DEFERRED_ALL; + } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + + if (flags & (1UL << TCP_TSQ_DEFERRED)) + tcp_tsq_handler(sk); + + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + tcp_write_timer_handler(sk); + + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + tcp_delack_timer_handler(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e911e6c..6df36ad 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; -static void tcp_write_timer(unsigned long); -static void tcp_delack_timer(unsigned long); -static void tcp_keepalive_timer (unsigned long data); - -void tcp_init_xmit_timers(struct sock *sk) -{ - inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, - &tcp_keepalive_timer); -} -EXPORT_SYMBOL(tcp_init_xmit_timers); - static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk) return 0; } -static void tcp_delack_timer(unsigned long data) +void tcp_delack_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); - goto out_unlock; - } - sk_mem_reclaim_partial(sk); if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) @@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data) out: if (sk_under_memory_pressure(sk)) sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_delack_timer_handler(sk); + } else { + inet_csk(sk)->icsk_ack.blocked = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -450,19 +443,11 @@ out_reset_timer: out:; } -static void tcp_write_timer(unsigned long data) +void tcp_write_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct inet_connection_sock *icsk = inet_csk(sk); int event; - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); - goto out_unlock; - } - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; @@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data) out: sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_write_timer_handler(sk); + } else { + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -602,3 +599,10 @@ out: bh_unlock_sock(sk); sock_put(sk); } + +void tcp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); +} +EXPORT_SYMBOL(tcp_init_xmit_timers); |