diff options
author | Eric Dumazet <edumazet@google.com> | 2015-02-06 20:59:01 (GMT) |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-02-09 00:53:57 (GMT) |
commit | 567e4b79731c352a17d73c483959f795d3593e03 (patch) | |
tree | 4af65c205a8b65cfc5fd7b42e7b8750728230616 /include | |
parent | 096a4cfa5807aa89c78ce12309c0b1c10cf88184 (diff) | |
download | linux-567e4b79731c352a17d73c483959f795d3593e03.tar.xz |
net: rfs: add hash collision detection
Receive Flow Steering is a nice solution but suffers from
hash collisions when a mix of connected and unconnected traffic
is received on the host, when flow hash table is populated.
Also, clearing flow in inet_release() makes RFS not very good
for short lived flows, as many packets can follow close().
(FIN , ACK packets, ...)
This patch extends the information stored into global hash table
to not only include cpu number, but upper part of the hash value.
I use a 32bit value, and dynamically split it in two parts.
For host with less than 64 possible cpus, this gives 6 bits for the
cpu number, and 26 (32-6) bits for the upper part of the hash.
Since hash bucket selection use low order bits of the hash, we have
a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big
enough.
If the hash found in flow table does not match, we fallback to RPS (if
it is enabled for the rxqueue).
This means that a packet for an non connected flow can avoid the
IPI through a unrelated/victim CPU.
This also means we no longer have to clear the table at socket
close time, and this helps short lived flows performance.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/netdevice.h | 34 | ||||
-rw-r--r-- | include/net/sock.h | 24 |
2 files changed, 18 insertions, 40 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce784d5..ab3b7ce 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -644,39 +644,39 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high order bits + * of flow hash, lower part is cpu number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - unsigned int mask; - u16 ents[0]; + u32 mask; + u32 ents[0]; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ - ((_num) * sizeof(u16))) +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff +extern u32 rps_cpu_mask; +extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; + static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { if (table && hash) { - unsigned int cpu, index = hash & table->mask; + unsigned int index = hash & table->mask; + u32 val = hash & ~rps_cpu_mask; /* We only give a hint, preemption can change cpu under us */ - cpu = raw_smp_processor_id(); + val |= raw_smp_processor_id(); - if (table->ents[index] != cpu) - table->ents[index] = cpu; + if (table->ents[index] != val) + table->ents[index] = val; } } -static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) - table->ents[hash & table->mask] = RPS_NO_CPU; -} - -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); diff --git a/include/net/sock.h b/include/net/sock.h index d28b8fe..e138245 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -857,18 +857,6 @@ static inline void sock_rps_record_flow_hash(__u32 hash) #endif } -static inline void sock_rps_reset_flow_hash(__u32 hash) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_reset_sock_flow(sock_flow_table, hash); - rcu_read_unlock(); -#endif -} - static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS @@ -876,28 +864,18 @@ static inline void sock_rps_record_flow(const struct sock *sk) #endif } -static inline void sock_rps_reset_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - sock_rps_reset_flow_hash(sk->sk_rxhash); -#endif -} - static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->hash)) { - sock_rps_reset_flow(sk); + if (unlikely(sk->sk_rxhash != skb->hash)) sk->sk_rxhash = skb->hash; - } #endif } static inline void sock_rps_reset_rxhash(struct sock *sk) { #ifdef CONFIG_RPS - sock_rps_reset_flow(sk); sk->sk_rxhash = 0; #endif } |