diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 02:57:02 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 02:57:02 (GMT) |
commit | aee3bfa3307cd0da2126bdc0ea359dabea5ee8f7 (patch) | |
tree | 3d35c69e8fa835098bb90f77f30abed120681651 /tools | |
parent | c597b6bcd5c624534afc3df65cdc42bb05173bca (diff) | |
parent | 415b6f19e87e350b13585591859d4fdf50772229 (diff) | |
download | linux-aee3bfa3307cd0da2126bdc0ea359dabea5ee8f7.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from Davic Miller:
1) Support busy polling generically, for all NAPI drivers. From Eric
Dumazet.
2) Add byte/packet counter support to nft_ct, from Floriani Westphal.
3) Add RSS/XPS support to mvneta driver, from Gregory Clement.
4) Implement IPV6_HDRINCL socket option for raw sockets, from Hannes
Frederic Sowa.
5) Add support for T6 adapter to cxgb4 driver, from Hariprasad Shenai.
6) Add support for VLAN device bridging to mlxsw switch driver, from
Ido Schimmel.
7) Add driver for Netronome NFP4000/NFP6000, from Jakub Kicinski.
8) Provide hwmon interface to mlxsw switch driver, from Jiri Pirko.
9) Reorganize wireless drivers into per-vendor directories just like we
do for ethernet drivers. From Kalle Valo.
10) Provide a way for administrators "destroy" connected sockets via the
SOCK_DESTROY socket netlink diag operation. From Lorenzo Colitti.
11) Add support to add/remove multicast routes via netlink, from Nikolay
Aleksandrov.
12) Make TCP keepalive settings per-namespace, from Nikolay Borisov.
13) Add forwarding and packet duplication facilities to nf_tables, from
Pablo Neira Ayuso.
14) Dead route support in MPLS, from Roopa Prabhu.
15) TSO support for thunderx chips, from Sunil Goutham.
16) Add driver for IBM's System i/p VNIC protocol, from Thomas Falcon.
17) Rationalize, consolidate, and more completely document the checksum
offloading facilities in the networking stack. From Tom Herbert.
18) Support aborting an ongoing scan in mac80211/cfg80211, from
Vidyullatha Kanchanapally.
19) Use per-bucket spinlock for bpf hash facility, from Tom Leiming.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1375 commits)
net: bnxt: always return values from _bnxt_get_max_rings
net: bpf: reject invalid shifts
phonet: properly unshare skbs in phonet_rcv()
dwc_eth_qos: Fix dma address for multi-fragment skbs
phy: remove an unneeded condition
mdio: remove an unneed condition
mdio_bus: NULL dereference on allocation error
net: Fix typo in netdev_intersect_features
net: freescale: mac-fec: Fix build error from phy_device API change
net: freescale: ucc_geth: Fix build error from phy_device API change
bonding: Prevent IPv6 link local address on enslaved devices
IB/mlx5: Add flow steering support
net/mlx5_core: Export flow steering API
net/mlx5_core: Make ipv4/ipv6 location more clear
net/mlx5_core: Enable flow steering support for the IB driver
net/mlx5_core: Initialize namespaces only when supported by device
net/mlx5_core: Set priority attributes
net/mlx5_core: Connect flow tables
net/mlx5_core: Introduce modify flow table command
net/mlx5_core: Managing root flow table
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/reuseport_bpf.c | 514 |
3 files changed, 516 insertions, 1 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0032662..6fb2336 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,3 +1,4 @@ socket psock_fanout psock_tpacket +reuseport_bpf diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fac4782..41449b5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket +NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf all: $(NET_PROGS) %: %.c diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c new file mode 100644 index 0000000..bec1b5d --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -0,0 +1,514 @@ +/* + * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use + * a BPF program (both classic and extended) to read the first word from an + * incoming packet (expected to be in network byte-order), calculate a modulus + * of that number, and then dispatch the packet to the Nth socket using the + * result. These tests are run for each supported address family and protocol. + * Additionally, a few edge cases in the implementation are tested. + */ + +#include <errno.h> +#include <error.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/unistd.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +struct test_params { + int recv_family; + int send_family; + int protocol; + size_t recv_socks; + uint16_t recv_port; + uint16_t send_port_min; +}; + +static size_t sockaddr_size(void) +{ + return sizeof(struct sockaddr_storage); +} + +static struct sockaddr *new_any_sockaddr(int family, uint16_t port) +{ + struct sockaddr_storage *addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + + addr = malloc(sizeof(struct sockaddr_storage)); + memset(addr, 0, sizeof(struct sockaddr_storage)); + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(port); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(port); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + return (struct sockaddr *)addr; +} + +static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port) +{ + struct sockaddr *addr = new_any_sockaddr(family, port); + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)addr; + addr6->sin6_addr = in6addr_loopback; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + return addr; +} + +static void attach_ebpf(int fd, uint16_t mod) +{ + static char bpf_log_buf[65536]; + static const char bpf_license[] = "GPL"; + + int bpf_fd; + const struct bpf_insn prog[] = { + /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */ + { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 }, + /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */ + { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 }, + /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */ + { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod }, + /* BPF_EXIT_INSN() */ + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insn_cnt = ARRAY_SIZE(prog); + attr.insns = (uint64_t)prog; + attr.license = (uint64_t)bpf_license; + attr.log_buf = (uint64_t)bpf_log_buf; + attr.log_size = sizeof(bpf_log_buf); + attr.log_level = 1; + attr.kern_version = 0; + + bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (bpf_fd < 0) + error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf); + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, + sizeof(bpf_fd))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF"); + + close(bpf_fd); +} + +static void attach_cbpf(int fd, uint16_t mod) +{ + struct sock_filter code[] = { + /* A = (uint32_t)skb[0] */ + { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 }, + /* A = A % mod */ + { BPF_ALU | BPF_MOD, 0, 0, mod }, + /* return A */ + { BPF_RET | BPF_A, 0, 0, 0 }, + }; + struct sock_fprog p = { + .len = ARRAY_SIZE(code), + .filter = code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF"); +} + +static void build_recv_group(const struct test_params p, int fd[], uint16_t mod, + void (*attach_bpf)(int, uint16_t)) +{ + struct sockaddr * const addr = + new_any_sockaddr(p.recv_family, p.recv_port); + int i, opt; + + for (i = 0; i < p.recv_socks; ++i) { + fd[i] = socket(p.recv_family, p.protocol, 0); + if (fd[i] < 0) + error(1, errno, "failed to create recv %d", i); + + opt = 1; + if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on %d", i); + + if (i == 0) + attach_bpf(fd[i], mod); + + if (bind(fd[i], addr, sockaddr_size())) + error(1, errno, "failed to bind recv socket %d", i); + + if (p.protocol == SOCK_STREAM) + if (listen(fd[i], p.recv_socks * 10)) + error(1, errno, "failed to listen on socket"); + } + free(addr); +} + +static void send_from(struct test_params p, uint16_t sport, char *buf, + size_t len) +{ + struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport); + struct sockaddr * const daddr = + new_loopback_sockaddr(p.send_family, p.recv_port); + const int fd = socket(p.send_family, p.protocol, 0); + + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, saddr, sockaddr_size())) + error(1, errno, "failed to bind send socket"); + if (connect(fd, daddr, sockaddr_size())) + error(1, errno, "failed to connect"); + + if (send(fd, buf, len, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); + free(saddr); + free(daddr); +} + +static void test_recv_order(const struct test_params p, int fd[], int mod) +{ + char recv_buf[8], send_buf[8]; + struct msghdr msg; + struct iovec recv_io = { recv_buf, 8 }; + struct epoll_event ev; + int epfd, conn, i, sport, expected; + uint32_t data, ndata; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (i = 0; i < p.recv_socks; ++i) { + ev.events = EPOLLIN; + ev.data.fd = fd[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev)) + error(1, errno, "failed to register sock %d epoll", i); + } + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &recv_io; + msg.msg_iovlen = 1; + + for (data = 0; data < p.recv_socks * 2; ++data) { + sport = p.send_port_min + data; + ndata = htonl(data); + memcpy(send_buf, &ndata, sizeof(ndata)); + send_from(p, sport, send_buf, sizeof(ndata)); + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll wait failed"); + + if (p.protocol == SOCK_STREAM) { + conn = accept(ev.data.fd, NULL, NULL); + if (conn < 0) + error(1, errno, "error accepting"); + i = recvmsg(conn, &msg, 0); + close(conn); + } else { + i = recvmsg(ev.data.fd, &msg, 0); + } + if (i < 0) + error(1, errno, "recvmsg error"); + if (i != sizeof(ndata)) + error(1, 0, "expected size %zd got %d", + sizeof(ndata), i); + + for (i = 0; i < p.recv_socks; ++i) + if (ev.data.fd == fd[i]) + break; + memcpy(&ndata, recv_buf, sizeof(ndata)); + fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata)); + + expected = (sport % mod); + if (i != expected) + error(1, 0, "expected socket %d", expected); + } +} + +static void test_reuseport_ebpf(const struct test_params p) +{ + int i, fd[p.recv_socks]; + + fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks); + build_recv_group(p, fd, p.recv_socks, attach_ebpf); + test_recv_order(p, fd, p.recv_socks); + + fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); + attach_ebpf(fd[0], p.recv_socks / 2); + test_recv_order(p, fd, p.recv_socks / 2); + + for (i = 0; i < p.recv_socks; ++i) + close(fd[i]); +} + +static void test_reuseport_cbpf(const struct test_params p) +{ + int i, fd[p.recv_socks]; + + fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks); + build_recv_group(p, fd, p.recv_socks, attach_cbpf); + test_recv_order(p, fd, p.recv_socks); + + fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); + attach_cbpf(fd[0], p.recv_socks / 2); + test_recv_order(p, fd, p.recv_socks / 2); + + for (i = 0; i < p.recv_socks; ++i) + close(fd[i]); +} + +static void test_extra_filter(const struct test_params p) +{ + struct sockaddr * const addr = + new_any_sockaddr(p.recv_family, p.recv_port); + int fd1, fd2, opt; + + fprintf(stderr, "Testing too many filters...\n"); + fd1 = socket(p.recv_family, p.protocol, 0); + if (fd1 < 0) + error(1, errno, "failed to create socket 1"); + fd2 = socket(p.recv_family, p.protocol, 0); + if (fd2 < 0) + error(1, errno, "failed to create socket 2"); + + opt = 1; + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 1"); + if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 2"); + + attach_ebpf(fd1, 10); + attach_ebpf(fd2, 10); + + if (bind(fd1, addr, sockaddr_size())) + error(1, errno, "failed to bind recv socket 1"); + + if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE) + error(1, errno, "bind socket 2 should fail with EADDRINUSE"); + + free(addr); +} + +static void test_filter_no_reuseport(const struct test_params p) +{ + struct sockaddr * const addr = + new_any_sockaddr(p.recv_family, p.recv_port); + const char bpf_license[] = "GPL"; + struct bpf_insn ecode[] = { + { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 }, + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }}; + union bpf_attr eprog; + struct sock_fprog cprog; + int fd, bpf_fd; + + fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n"); + + memset(&eprog, 0, sizeof(eprog)); + eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + eprog.insn_cnt = ARRAY_SIZE(ecode); + eprog.insns = (uint64_t)ecode; + eprog.license = (uint64_t)bpf_license; + eprog.kern_version = 0; + + memset(&cprog, 0, sizeof(cprog)); + cprog.len = ARRAY_SIZE(ccode); + cprog.filter = ccode; + + + bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog)); + if (bpf_fd < 0) + error(1, errno, "ebpf error"); + fd = socket(p.recv_family, p.protocol, 0); + if (fd < 0) + error(1, errno, "failed to create socket 1"); + + if (bind(fd, addr, sockaddr_size())) + error(1, errno, "failed to bind recv socket 1"); + + errno = 0; + if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, + sizeof(bpf_fd)) || errno != EINVAL) + error(1, errno, "setsockopt should have returned EINVAL"); + + errno = 0; + if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog, + sizeof(cprog)) || errno != EINVAL) + error(1, errno, "setsockopt should have returned EINVAL"); + + free(addr); +} + +static void test_filter_without_bind(void) +{ + int fd1, fd2; + + fprintf(stderr, "Testing filter add without bind...\n"); + fd1 = socket(AF_INET, SOCK_DGRAM, 0); + if (fd1 < 0) + error(1, errno, "failed to create socket 1"); + fd2 = socket(AF_INET, SOCK_DGRAM, 0); + if (fd2 < 0) + error(1, errno, "failed to create socket 2"); + + attach_ebpf(fd1, 10); + attach_cbpf(fd2, 10); + + close(fd1); + close(fd2); +} + + +int main(void) +{ + fprintf(stderr, "---- IPv4 UDP ----\n"); + /* NOTE: UDP socket lookups traverse a different code path when there + * are > 10 sockets in a group. Run the bpf test through both paths. + */ + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8000, + .send_port_min = 9000}); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8000, + .send_port_min = 9000}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8001, + .send_port_min = 9020}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8001, + .send_port_min = 9020}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_port = 8002}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_port = 8008}); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8003, + .send_port_min = 9040}); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8003, + .send_port_min = 9040}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8004, + .send_port_min = 9060}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8004, + .send_port_min = 9060}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_port = 8005}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_port = 8009}); + + fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8006, + .send_port_min = 9080}); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8006, + .send_port_min = 9080}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 10, + .recv_port = 8007, + .send_port_min = 9100}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8007, + .send_port_min = 9100}); + + + test_filter_without_bind(); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} |