summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-02-06 20:59:01 (GMT)
committerDavid S. Miller <davem@davemloft.net>2015-02-09 00:53:57 (GMT)
commit567e4b79731c352a17d73c483959f795d3593e03 (patch)
tree4af65c205a8b65cfc5fd7b42e7b8750728230616 /include
parent096a4cfa5807aa89c78ce12309c0b1c10cf88184 (diff)
downloadlinux-567e4b79731c352a17d73c483959f795d3593e03.tar.xz
net: rfs: add hash collision detection
Receive Flow Steering is a nice solution but suffers from hash collisions when a mix of connected and unconnected traffic is received on the host, when flow hash table is populated. Also, clearing flow in inet_release() makes RFS not very good for short lived flows, as many packets can follow close(). (FIN , ACK packets, ...) This patch extends the information stored into global hash table to not only include cpu number, but upper part of the hash value. I use a 32bit value, and dynamically split it in two parts. For host with less than 64 possible cpus, this gives 6 bits for the cpu number, and 26 (32-6) bits for the upper part of the hash. Since hash bucket selection use low order bits of the hash, we have a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big enough. If the hash found in flow table does not match, we fallback to RPS (if it is enabled for the rxqueue). This means that a packet for an non connected flow can avoid the IPI through a unrelated/victim CPU. This also means we no longer have to clear the table at socket close time, and this helps short lived flows performance. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netdevice.h34
-rw-r--r--include/net/sock.h24
2 files changed, 18 insertions, 40 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ce784d5..ab3b7ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -644,39 +644,39 @@ struct rps_dev_flow_table {
/*
* The rps_sock_flow_table contains mappings of flows to the last CPU
* on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high order bits
+ * of flow hash, lower part is cpu number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
- unsigned int mask;
- u16 ents[0];
+ u32 mask;
+ u32 ents[0];
};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
- ((_num) * sizeof(u16)))
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
#define RPS_NO_CPU 0xffff
+extern u32 rps_cpu_mask;
+extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
u32 hash)
{
if (table && hash) {
- unsigned int cpu, index = hash & table->mask;
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~rps_cpu_mask;
/* We only give a hint, preemption can change cpu under us */
- cpu = raw_smp_processor_id();
+ val |= raw_smp_processor_id();
- if (table->ents[index] != cpu)
- table->ents[index] = cpu;
+ if (table->ents[index] != val)
+ table->ents[index] = val;
}
}
-static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
-{
- if (table && hash)
- table->ents[hash & table->mask] = RPS_NO_CPU;
-}
-
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id);
diff --git a/include/net/sock.h b/include/net/sock.h
index d28b8fe..e138245 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -857,18 +857,6 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
#endif
}
-static inline void sock_rps_reset_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_reset_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
static inline void sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
@@ -876,28 +864,18 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}
-static inline void sock_rps_reset_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- sock_rps_reset_flow_hash(sk->sk_rxhash);
-#endif
-}
-
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->hash)) {
- sock_rps_reset_flow(sk);
+ if (unlikely(sk->sk_rxhash != skb->hash))
sk->sk_rxhash = skb->hash;
- }
#endif
}
static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
- sock_rps_reset_flow(sk);
sk->sk_rxhash = 0;
#endif
}