summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 17:41:05 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 17:41:05 (GMT)
commitb0f85fa11aefc4f3e03306b4cd47f113bd57dcba (patch)
tree1333d36d99fde3f97210795941fc246f0ad08a75 /samples
parentccc9d4a6d640cbde05d519edeb727881646cf71b (diff)
parentf32bfb9a8ca083f8d148ea90ae5ba66f4831836e (diff)
downloadlinux-b0f85fa11aefc4f3e03306b4cd47f113bd57dcba.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: Changes of note: 1) Allow to schedule ICMP packets in IPVS, from Alex Gartrell. 2) Provide FIB table ID in ipv4 route dumps just as ipv6 does, from David Ahern. 3) Allow the user to ask for the statistics to be filtered out of ipv4/ipv6 address netlink dumps. From Sowmini Varadhan. 4) More work to pass the network namespace context around deep into various packet path APIs, starting with the netfilter hooks. From Eric W Biederman. 5) Add layer 2 TX/RX checksum offloading to qeth driver, from Thomas Richter. 6) Use usec resolution for SYN/ACK RTTs in TCP, from Yuchung Cheng. 7) Support Very High Throughput in wireless MESH code, from Bob Copeland. 8) Allow setting the ageing_time in switchdev/rocker. From Scott Feldman. 9) Properly autoload L2TP type modules, from Stephen Hemminger. 10) Fix and enable offload features by default in 8139cp driver, from David Woodhouse. 11) Support both ipv4 and ipv6 sockets in a single vxlan device, from Jiri Benc. 12) Fix CWND limiting of thin streams in TCP, from Bendik Rønning Opstad. 13) Fix IPSEC flowcache overflows on large systems, from Steffen Klassert. 14) Convert bridging to track VLANs using rhashtable entries rather than a bitmap. From Nikolay Aleksandrov. 15) Make TCP listener handling completely lockless, this is a major accomplishment. Incoming request sockets now live in the established hash table just like any other socket too. From Eric Dumazet. 15) Provide more bridging attributes to netlink, from Nikolay Aleksandrov. 16) Use hash based algorithm for ipv4 multipath routing, this was very long overdue. From Peter Nørlund. 17) Several y2038 cures, mostly avoiding timespec. From Arnd Bergmann. 18) Allow non-root execution of EBPF programs, from Alexei Starovoitov. 19) Support SO_INCOMING_CPU as setsockopt, from Eric Dumazet. This influences the port binding selection logic used by SO_REUSEPORT. 20) Add ipv6 support to VRF, from David Ahern. 21) Add support for Mellanox Spectrum switch ASIC, from Jiri Pirko. 22) Add rtl8xxxu Realtek wireless driver, from Jes Sorensen. 23) Implement RACK loss recovery in TCP, from Yuchung Cheng. 24) Support multipath routes in MPLS, from Roopa Prabhu. 25) Fix POLLOUT notification for listening sockets in AF_UNIX, from Eric Dumazet. 26) Add new QED Qlogic river, from Yuval Mintz, Manish Chopra, and Sudarsana Kalluru. 27) Don't fetch timestamps on AF_UNIX sockets, from Hannes Frederic Sowa. 28) Support ipv6 geneve tunnels, from John W Linville. 29) Add flood control support to switchdev layer, from Ido Schimmel. 30) Fix CHECKSUM_PARTIAL handling of potentially fragmented frames, from Hannes Frederic Sowa. 31) Support persistent maps and progs in bpf, from Daniel Borkmann. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1790 commits) sh_eth: use DMA barriers switchdev: respect SKIP_EOPNOTSUPP flag in case there is no recursion net: sched: kill dead code in sch_choke.c irda: Delete an unnecessary check before the function call "irlmp_unregister_service" net: dsa: mv88e6xxx: include DSA ports in VLANs net: dsa: mv88e6xxx: disable SA learning for DSA and CPU ports net/core: fix for_each_netdev_feature vlan: Invoke driver vlan hooks only if device is present arcnet/com20020: add LEDS_CLASS dependency bpf, verifier: annotate verbose printer with __printf dp83640: Only wait for timestamps for packets with timestamping enabled. ptp: Change ptp_class to a proper bitmask dp83640: Prune rx timestamp list before reading from it dp83640: Delay scheduled work. dp83640: Include hash in timestamp/packet matching ipv6: fix tunnel error handling net/mlx5e: Fix LSO vlan insertion net/mlx5e: Re-eanble client vlan TX acceleration net/mlx5e: Return error in case mlx5e_set_features() fails net/mlx5e: Don't allow more than max supported channels ...
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile10
-rw-r--r--samples/bpf/bpf_helpers.h6
-rw-r--r--samples/bpf/fds_example.c183
-rw-r--r--samples/bpf/libbpf.c19
-rw-r--r--samples/bpf/libbpf.h11
-rw-r--r--samples/bpf/tcbpf1_kern.c24
-rw-r--r--samples/bpf/test_verifier.c357
-rw-r--r--samples/bpf/trace_output_kern.c31
-rw-r--r--samples/bpf/trace_output_user.c196
9 files changed, 826 insertions, 11 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 63e7d50..79b4596 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -4,6 +4,7 @@ obj- := dummy.o
# List of programs to build
hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
+hostprogs-y += fds_example
hostprogs-y += sockex1
hostprogs-y += sockex2
hostprogs-y += sockex3
@@ -13,11 +14,13 @@ hostprogs-y += tracex3
hostprogs-y += tracex4
hostprogs-y += tracex5
hostprogs-y += tracex6
+hostprogs-y += trace_output
hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
+fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
@@ -27,6 +30,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o
# Tell kbuild to always build the programs
@@ -40,12 +44,14 @@ always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
always += tracex6_kern.o
+always += trace_output_kern.o
always += tcbpf1_kern.o
always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
HOSTLOADLIBES_sockex3 += -lelf
@@ -55,6 +61,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
# point this to your LLVM backend with bpf support
@@ -64,3 +71,6 @@ $(obj)/%.o: $(src)/%.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index af44e56..7ad19e1 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -33,6 +33,12 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
(void *) BPF_FUNC_get_current_comm;
static int (*bpf_perf_event_read)(void *map, int index) =
(void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+ (void *) BPF_FUNC_perf_event_output;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
new file mode 100644
index 0000000..e2fd16c
--- /dev/null
+++ b/samples/bpf/fds_example.c
@@ -0,0 +1,183 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "bpf_load.h"
+#include "libbpf.h"
+
+#define BPF_F_PIN (1 << 0)
+#define BPF_F_GET (1 << 1)
+#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
+
+#define BPF_F_KEY (1 << 2)
+#define BPF_F_VAL (1 << 3)
+#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
+
+#define BPF_M_UNSPEC 0
+#define BPF_M_MAP 1
+#define BPF_M_PROG 2
+
+static void usage(void)
+{
+ printf("Usage: fds_example [...]\n");
+ printf(" -F <file> File to pin/get object\n");
+ printf(" -P |- pin object\n");
+ printf(" -G `- get object\n");
+ printf(" -m eBPF map mode\n");
+ printf(" -k <key> |- map key\n");
+ printf(" -v <value> `- map value\n");
+ printf(" -p eBPF prog mode\n");
+ printf(" -o <object> `- object file\n");
+ printf(" -h Display this help.\n");
+}
+
+static int bpf_map_create(void)
+{
+ return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+ sizeof(uint32_t), 1024);
+}
+
+static int bpf_prog_create(const char *object)
+{
+ static const struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ if (object) {
+ assert(!load_bpf_file((char *)object));
+ return prog_fd[0];
+ } else {
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ insns, sizeof(insns), "GPL", 0);
+ }
+}
+
+static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
+ uint32_t value)
+{
+ int fd, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_map_create();
+ printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
+ ret = bpf_update_elem(fd, &key, &value, 0);
+ printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ } else if (flags & BPF_F_KEY) {
+ ret = bpf_lookup_elem(fd, &key, &value);
+ printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ }
+
+ return 0;
+}
+
+static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
+{
+ int fd, sock, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_prog_create(object);
+ printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ sock = open_raw_sock("lo");
+ assert(sock > 0);
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
+ printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
+ ret, strerror(errno));
+ assert(ret == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = NULL, *object = NULL;
+ uint32_t key = 0, value = 0, flags = 0;
+ int opt, mode = BPF_M_UNSPEC;
+
+ while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ file = optarg;
+ break;
+ case 'P':
+ flags |= BPF_F_PIN;
+ break;
+ case 'G':
+ flags |= BPF_F_GET;
+ break;
+ /* Map-related args */
+ case 'm':
+ mode = BPF_M_MAP;
+ break;
+ case 'k':
+ key = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_KEY;
+ break;
+ case 'v':
+ value = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_VAL;
+ break;
+ /* Prog-related args */
+ case 'p':
+ mode = BPF_M_PROG;
+ break;
+ case 'o':
+ object = optarg;
+ break;
+ default:
+ goto out;
+ }
+ }
+
+ if (!(flags & BPF_F_PIN_GET) || !file)
+ goto out;
+
+ switch (mode) {
+ case BPF_M_MAP:
+ return bpf_do_map(file, flags, key, value);
+ case BPF_M_PROG:
+ return bpf_do_prog(file, flags, object);
+ }
+out:
+ usage();
+ return -1;
+}
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 7e1efa7..65a8d48 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 7235e29..014aacf 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
#define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE];
@@ -64,6 +67,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
.off = 0, \
.imm = 0 })
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
/* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 9bfb2eb..fa051b3 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -5,7 +5,7 @@
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
-
+#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h"
/* compiler workaround */
@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
return 0;
}
+SEC("redirect_xmit")
+int _redirect_xmit(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 0);
+}
+SEC("redirect_recv")
+int _redirect_recv(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 1);
+}
+SEC("clone_redirect_xmit")
+int _clone_redirect_xmit(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 0);
+ return TC_ACT_SHOT;
+}
+SEC("clone_redirect_recv")
+int _clone_redirect_recv(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 1);
+ return TC_ACT_SHOT;
+}
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index ee0f110..563c507 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -15,20 +15,27 @@
#include <string.h>
#include <linux/filter.h>
#include <stddef.h>
+#include <stdbool.h>
+#include <sys/resource.h>
#include "libbpf.h"
#define MAX_INSNS 512
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define MAX_FIXUPS 8
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
- int fixup[32];
+ int fixup[MAX_FIXUPS];
+ int prog_array_fixup[MAX_FIXUPS];
const char *errstr;
+ const char *errstr_unpriv;
enum {
+ UNDEF,
ACCEPT,
REJECT
- } result;
+ } result, result_unpriv;
enum bpf_prog_type prog_type;
};
@@ -96,6 +103,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -109,6 +117,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -219,6 +228,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -294,7 +304,9 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
+ .result_unpriv = REJECT,
},
{
"check corrupted spill/fill",
@@ -310,6 +322,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "attempt to corrupt spilled",
.errstr = "corrupted spill",
.result = REJECT,
},
@@ -473,6 +486,7 @@ static struct bpf_test tests[] = {
},
.fixup = {3},
.errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
.result = REJECT,
},
{
@@ -495,6 +509,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -521,6 +537,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -555,6 +573,8 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup = {24},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -603,6 +623,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -642,6 +664,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -699,6 +723,7 @@ static struct bpf_test tests[] = {
},
.fixup = {4},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -720,6 +745,7 @@ static struct bpf_test tests[] = {
},
.fixup = {6},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -742,6 +768,7 @@ static struct bpf_test tests[] = {
},
.fixup = {7},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -752,6 +779,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
.result = REJECT,
},
{
@@ -762,6 +790,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
.result = REJECT,
},
{
@@ -772,16 +801,18 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
.result = REJECT,
},
{
"check out of range skb->cb access",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[60])),
+ offsetof(struct __sk_buff, cb[0]) + 256),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_ACT,
},
@@ -803,6 +834,8 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
},
{
"write skb fields from tc_cls_act prog",
@@ -819,6 +852,8 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[3])),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
@@ -881,6 +916,270 @@ static struct bpf_test tests[] = {
.result = REJECT,
.errstr = "invalid stack off=0 size=8",
},
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func 6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_array_fixup = {1},
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {1},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: obfuscate stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer arithmetic",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
};
static int probe_filter_length(struct bpf_insn *fp)
@@ -896,13 +1195,24 @@ static int probe_filter_length(struct bpf_insn *fp)
static int create_map(void)
{
- long long key, value = 0;
int map_fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024);
- if (map_fd < 0) {
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(long long), sizeof(long long), 1024);
+ if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno));
- }
+
+ return map_fd;
+}
+
+static int create_prog_array(void)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
+ sizeof(int), sizeof(int), 4);
+ if (map_fd < 0)
+ printf("failed to create prog_array '%s'\n", strerror(errno));
return map_fd;
}
@@ -910,13 +1220,17 @@ static int create_map(void)
static int test(void)
{
int prog_fd, i, pass_cnt = 0, err_cnt = 0;
+ bool unpriv = geteuid() != 0;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_insn *prog = tests[i].insns;
int prog_type = tests[i].prog_type;
int prog_len = probe_filter_length(prog);
int *fixup = tests[i].fixup;
- int map_fd = -1;
+ int *prog_array_fixup = tests[i].prog_array_fixup;
+ int expected_result;
+ const char *expected_errstr;
+ int map_fd = -1, prog_array_fd = -1;
if (*fixup) {
map_fd = create_map();
@@ -926,13 +1240,31 @@ static int test(void)
fixup++;
} while (*fixup);
}
+ if (*prog_array_fixup) {
+ prog_array_fd = create_prog_array();
+
+ do {
+ prog[*prog_array_fixup].imm = prog_array_fd;
+ prog_array_fixup++;
+ } while (*prog_array_fixup);
+ }
printf("#%d %s ", i, tests[i].descr);
prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
prog, prog_len * sizeof(struct bpf_insn),
"GPL", 0);
- if (tests[i].result == ACCEPT) {
+ if (unpriv && tests[i].result_unpriv != UNDEF)
+ expected_result = tests[i].result_unpriv;
+ else
+ expected_result = tests[i].result;
+
+ if (unpriv && tests[i].errstr_unpriv)
+ expected_errstr = tests[i].errstr_unpriv;
+ else
+ expected_errstr = tests[i].errstr;
+
+ if (expected_result == ACCEPT) {
if (prog_fd < 0) {
printf("FAIL\nfailed to load prog '%s'\n",
strerror(errno));
@@ -947,7 +1279,7 @@ static int test(void)
err_cnt++;
goto fail;
}
- if (strstr(bpf_log_buf, tests[i].errstr) == 0) {
+ if (strstr(bpf_log_buf, expected_errstr) == 0) {
printf("FAIL\nunexpected error message: %s",
bpf_log_buf);
err_cnt++;
@@ -960,6 +1292,8 @@ static int test(void)
fail:
if (map_fd >= 0)
close(map_fd);
+ if (prog_array_fd >= 0)
+ close(prog_array_fd);
close(prog_fd);
}
@@ -970,5 +1304,8 @@ fail:
int main(void)
{
+ struct rlimit r = {1 << 20, 1 << 20};
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
return test();
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
new file mode 100644
index 0000000..8d8d1ec
--- /dev/null
+++ b/samples/bpf/trace_output_kern.c
@@ -0,0 +1,31 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 2,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct S {
+ u64 pid;
+ u64 cookie;
+ } data;
+
+ memset(&data, 0, sizeof(data));
+ data.pid = bpf_get_current_pid_tgid();
+ data.cookie = 0x12345678;
+
+ bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
new file mode 100644
index 0000000..661a7d0
--- /dev/null
+++ b/samples/bpf/trace_output_user.c
@@ -0,0 +1,196 @@
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int pmu_fd;
+
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page *header;
+
+typedef void (*print_fn)(void *data, int size);
+
+static int perf_event_mmap(int fd)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ header = base;
+ return 0;
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+void perf_event_read(print_fn fn)
+{
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ __u64 buffer_size = page_cnt * page_size;
+ void *base, *begin, *end;
+ char buf[256];
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return;
+
+ base = ((char *)header) + page_size;
+
+ begin = base + data_tail % buffer_size;
+ end = base + data_head % buffer_size;
+
+ while (begin != end) {
+ struct perf_event_sample *e;
+
+ e = begin;
+ if (begin + e->header.size > base + buffer_size) {
+ long len = base + buffer_size - begin;
+
+ assert(len < e->header.size);
+ memcpy(buf, begin, len);
+ memcpy(buf + len, base, e->header.size - len);
+ e = (void *) buf;
+ begin = base + e->header.size - len;
+ } else if (begin + e->header.size == base + buffer_size) {
+ begin = base;
+ } else {
+ begin += e->header.size;
+ }
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ fn(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_head;
+}
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static __u64 start_time;
+
+#define MAX_CNT 100000ll
+
+static void print_bpf_output(void *data, int size)
+{
+ static __u64 cnt;
+ struct {
+ __u64 pid;
+ __u64 cookie;
+ } *e = data;
+
+ if (e->cookie != 0x12345678) {
+ printf("BUG pid %llx cookie %llx sized %d\n",
+ e->pid, e->cookie, size);
+ kill(0, SIGINT);
+ }
+
+ cnt++;
+
+ if (cnt == MAX_CNT) {
+ printf("recv %lld events per sec\n",
+ MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ kill(0, SIGINT);
+ }
+}
+
+static void test_bpf_perf_event(void)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int key = 0;
+
+ pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+
+ assert(pmu_fd >= 0);
+ assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+
+ if (perf_event_mmap(pmu_fd) < 0)
+ return 1;
+
+ f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
+ (void) f;
+
+ start_time = time_get_ns();
+ for (;;) {
+ perf_event_poll(pmu_fd);
+ perf_event_read(print_bpf_output);
+ }
+
+ return 0;
+}