diff options
author | Eric Dumazet <edumazet@google.com> | 2015-10-09 02:33:22 (GMT) |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-13 02:28:22 (GMT) |
commit | 8e5eb54d303b7cb1174977ca79030e135728c95e (patch) | |
tree | 9aebc1d9e60ccbb559dac5d0d8d11f41b95df3c2 | |
parent | 70da268b569d32a9fddeea85dc18043de9d89f89 (diff) | |
download | linux-8e5eb54d303b7cb1174977ca79030e135728c95e.tar.xz |
net: align sk_refcnt on 128 bytes boundary
sk->sk_refcnt is dirtied for every TCP/UDP incoming packet.
This is a performance issue if multiple cpus hit a common socket,
or multiple sockets are chained due to SO_REUSEPORT.
By moving sk_refcnt 8 bytes further, first 128 bytes of sockets
are mostly read. As they contain the lookup keys, this has
a considerable performance impact, as cpus can cache them.
These 8 bytes are not wasted, we use them as a place holder
for various fields, depending on the socket type.
Tested:
SYN flood hitting a 16 RX queues NIC.
TCP listener using 16 sockets and SO_REUSEPORT
and SO_INCOMING_CPU for proper siloing.
Could process 6.0 Mpps SYN instead of 4.2 Mpps
Kernel profile looked like :
11.68% [kernel] [k] sha_transform
6.51% [kernel] [k] __inet_lookup_listener
5.07% [kernel] [k] __inet_lookup_established
4.15% [kernel] [k] memcpy_erms
3.46% [kernel] [k] ipt_do_table
2.74% [kernel] [k] fib_table_lookup
2.54% [kernel] [k] tcp_make_synack
2.34% [kernel] [k] tcp_conn_request
2.05% [kernel] [k] __netif_receive_skb_core
2.03% [kernel] [k] kmem_cache_alloc
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_timewait_sock.h | 2 | ||||
-rw-r--r-- | include/net/request_sock.h | 2 | ||||
-rw-r--r-- | include/net/sock.h | 17 |
3 files changed, 16 insertions, 5 deletions
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 186f3a1..e581fc6 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -70,6 +70,7 @@ struct inet_timewait_sock { #define tw_dport __tw_common.skc_dport #define tw_num __tw_common.skc_num #define tw_cookie __tw_common.skc_cookie +#define tw_dr __tw_common.skc_tw_dr int tw_timeout; volatile unsigned char tw_substate; @@ -88,7 +89,6 @@ struct inet_timewait_sock { kmemcheck_bitfield_end(flags); struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; - struct inet_timewait_death_row *tw_dr; }; #define tw_tclass tw_tos diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 95ab5d7..6b818b7 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -50,9 +50,9 @@ struct request_sock { struct sock_common __req_common; #define rsk_refcnt __req_common.skc_refcnt #define rsk_hash __req_common.skc_hash +#define rsk_listener __req_common.skc_listener struct request_sock *dl_next; - struct sock *rsk_listener; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ diff --git a/include/net/sock.h b/include/net/sock.h index cf54739..6571240 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -150,6 +150,9 @@ typedef __u64 __bitwise __addrpair; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_flags: place holder for sk_flags + * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, + * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings * @skc_incoming_cpu: record/match cpu processing incoming packets * @skc_refcnt: reference count * @@ -201,6 +204,16 @@ struct sock_common { atomic64_t skc_cookie; + /* following fields are padding to force + * offset(struct sock, sk_refcnt) == 128 on 64bit arches + * assuming IPV6 is enabled. We use this padding differently + * for different kind of 'sockets' + */ + union { + unsigned long skc_flags; + struct sock *skc_listener; /* request_sock */ + struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */ + }; /* * fields between dontcopy_begin/dontcopy_end * are not copied in sock_copy() @@ -246,8 +259,6 @@ struct cg_proto; * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes - * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, - * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -334,6 +345,7 @@ struct sock { #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_cookie __sk_common.skc_cookie #define sk_incoming_cpu __sk_common.skc_incoming_cpu +#define sk_flags __sk_common.skc_flags socket_lock_t sk_lock; struct sk_buff_head sk_receive_queue; @@ -371,7 +383,6 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif - unsigned long sk_flags; struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; spinlock_t sk_dst_lock; |