From 1f3fe7ebf6136c341012db9f554d4caa566fcbaa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:42 -0700 Subject: cgroup: Add cgroup_get_from_fd Add a helper function to get a cgroup2 from a fd. It will be stored in a bpf array (BPF_MAP_TYPE_CGROUP_ARRAY) which will be introduced in the later patch. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Signed-off-by: David S. Miller diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a20320c..984f73b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); struct cgroup *cgroup_get_from_path(const char *path); +struct cgroup *cgroup_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 75c0ff0..50787cd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -62,6 +62,7 @@ #include #include #include +#include #include /* @@ -6209,6 +6210,40 @@ struct cgroup *cgroup_get_from_path(const char *path) } EXPORT_SYMBOL_GPL(cgroup_get_from_path); +/** + * cgroup_get_from_fd - get a cgroup pointer from a fd + * @fd: fd obtained by open(cgroup2_dir) + * + * Find the cgroup from a fd which should be obtained + * by opening a cgroup directory. Returns a pointer to the + * cgroup on success. ERR_PTR is returned if the cgroup + * cannot be found. + */ +struct cgroup *cgroup_get_from_fd(int fd) +{ + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + struct file *f; + + f = fget_raw(fd); + if (!f) + return ERR_PTR(-EBADF); + + css = css_tryget_online_from_dir(f->f_path.dentry, NULL); + fput(f); + if (IS_ERR(css)) + return ERR_CAST(css); + + cgrp = css->cgroup; + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + return ERR_PTR(-EBADF); + } + + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_fd); + /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. -- cgit v0.10.2 From 4ed8ec521ed57c4e207ad464ca0388776de74d4b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:43 -0700 Subject: cgroup: bpf: Add BPF_MAP_TYPE_CGROUP_ARRAY Add a BPF_MAP_TYPE_CGROUP_ARRAY and its bpf_map_ops's implementations. To update an element, the caller is expected to obtain a cgroup2 backed fd by open(cgroup2_dir) and then update the array with that fd. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index be6ac12..26c04be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,6 +84,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 4ec57a6..db1a743 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -537,3 +537,46 @@ static int __init register_perf_event_array_map(void) return 0; } late_initcall(register_perf_event_array_map); + +#ifdef CONFIG_SOCK_CGROUP_DATA +static void *cgroup_fd_array_get_ptr(struct bpf_map *map, + struct file *map_file /* not used */, + int fd) +{ + return cgroup_get_from_fd(fd); +} + +static void cgroup_fd_array_put_ptr(void *ptr) +{ + /* cgroup_put free cgrp after a rcu grace period */ + cgroup_put(ptr); +} + +static void cgroup_fd_array_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static const struct bpf_map_ops cgroup_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = cgroup_fd_array_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = cgroup_fd_array_get_ptr, + .map_fd_put_ptr = cgroup_fd_array_put_ptr, +}; + +static struct bpf_map_type_list cgroup_array_type __read_mostly = { + .ops = &cgroup_array_ops, + .type = BPF_MAP_TYPE_CGROUP_ARRAY, +}; + +static int __init register_cgroup_array_map(void) +{ + bpf_register_map_type(&cgroup_array_type); + return 0; +} +late_initcall(register_cgroup_array_map); +#endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 22863d9..96d938a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -393,7 +393,8 @@ static int map_update_elem(union bpf_attr *attr) } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_update(map, key, value, attr->flags); } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || - map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { rcu_read_lock(); err = bpf_fd_array_map_update_elem(map, f.file, key, value, attr->flags); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eec9f90..69ba225 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1035,6 +1035,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (func_id != BPF_FUNC_get_stackid) goto error; break; + case BPF_MAP_TYPE_CGROUP_ARRAY: + goto error; default: break; } -- cgit v0.10.2 From 4a482f34afcc162d8456f449b137ec2a95be60d8 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:44 -0700 Subject: cgroup: bpf: Add bpf_skb_in_cgroup_proto Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk belongs to a descendant of a cgroup2. It is similar to the feature added in netfilter: commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY which will be used by the bpf_skb_in_cgroup. Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY and bpf_skb_in_cgroup() are always used together. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 26c04be..f44504d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -337,6 +337,17 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_type, + /** + * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_skb_in_cgroup, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 69ba225..e206c21 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1036,7 +1036,9 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - goto error; + if (func_id != BPF_FUNC_skb_in_cgroup) + goto error; + break; default: break; } @@ -1056,6 +1058,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_skb_in_cgroup: + if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) + goto error; + break; default: break; } diff --git a/net/core/filter.c b/net/core/filter.c index 76fee35..54071cf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2239,6 +2239,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } +#ifdef CONFIG_SOCK_CGROUP_DATA +static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long)r1; + struct bpf_map *map = (struct bpf_map *)(long)r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + struct sock *sk; + u32 i = (u32)r3; + + sk = skb->sk; + if (!sk || !sk_fullsock(sk)) + return -ENOENT; + + if (unlikely(i >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[i]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); +} + +static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { + .func = bpf_skb_in_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; +#endif + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -2307,6 +2341,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return bpf_get_event_output_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; +#ifdef CONFIG_SOCK_CGROUP_DATA + case BPF_FUNC_skb_in_cgroup: + return &bpf_skb_in_cgroup_proto; +#endif default: return sk_filter_func_proto(func_id); } -- cgit v0.10.2 From a3f74617340b598dbc7eb5b68d4ed53b4a70f5eb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:45 -0700 Subject: cgroup: bpf: Add an example to do cgroup checking in BPF test_cgrp2_array_pin.c: A userland program that creates a bpf_map (BPF_MAP_TYPE_GROUP_ARRAY), pouplates/updates it with a cgroup2's backed fd and pins it to a bpf-fs's file. The pinned file can be loaded by tc and then used by the bpf prog later. This program can also update an existing pinned array and it could be useful for debugging/testing purpose. test_cgrp2_tc_kern.c: A bpf prog which should be loaded by tc. It is to demonstrate the usage of bpf_skb_in_cgroup. test_cgrp2_tc.sh: A script that glues the test_cgrp2_array_pin.c and test_cgrp2_tc_kern.c together. The idea is like: 1. Load the test_cgrp2_tc_kern.o by tc 2. Use test_cgrp2_array_pin.c to populate a BPF_MAP_TYPE_CGROUP_ARRAY with a cgroup fd 3. Do a 'ping -6 ff02::1%ve' to ensure the packet has been dropped because of a match on the cgroup Most of the lines in test_cgrp2_tc.sh is the boilerplate to setup the cgroup/bpf-fs/net-devices/netns...etc. It is not bulletproof on errors but should work well enough and give enough debug info if things did not go well. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0bf2478..a98b780 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -20,6 +20,7 @@ hostprogs-y += offwaketime hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead +hostprogs-y += test_cgrp2_array_pin test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o +test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -61,6 +63,7 @@ always += map_perf_test_kern.o always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o +always += test_cgrp2_tc_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7904a2a..84e3fd9 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_in_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c new file mode 100644 index 0000000..70e86f7 --- /dev/null +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: test_cgrp2_array_pin [...]\n"); + printf(" -F File to pin an BPF cgroup array\n"); + printf(" -U Update an already pinned BPF cgroup array\n"); + printf(" -v Full path of the cgroup2\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL, *cg2 = NULL; + int create_array = 1; + int array_key = 0; + int array_fd = -1; + int cg2_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { + switch (opt) { + /* General args */ + case 'F': + pinned_file = optarg; + break; + case 'U': + pinned_file = optarg; + create_array = 0; + break; + case 'v': + cg2 = optarg; + break; + default: + usage(); + goto out; + } + } + + if (!cg2 || !pinned_file) { + usage(); + goto out; + } + + cg2_fd = open(cg2, O_RDONLY); + if (cg2_fd < 0) { + fprintf(stderr, "open(%s,...): %s(%d)\n", + cg2, strerror(errno), errno); + goto out; + } + + if (create_array) { + array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, + sizeof(uint32_t), sizeof(uint32_t), + 1, 0); + if (array_fd < 0) { + fprintf(stderr, + "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", + strerror(errno), errno); + goto out; + } + } else { + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + + ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + if (create_array) { + ret = bpf_obj_pin(array_fd, pinned_file); + if (ret) { + fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + +out: + if (array_fd != -1) + close(array_fd); + if (cg2_fd != -1) + close(cg2_fd); + return ret; +} diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh new file mode 100755 index 0000000..0b119ee --- /dev/null +++ b/samples/bpf/test_cgrp2_tc.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +MY_DIR=$(dirname $0) +# Details on the bpf prog +BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' +BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" +BPF_SECTION='filter' + +[ -z "$TC" ] && TC='tc' +[ -z "$IP" ] && IP='ip' + +# Names of the veth interface, net namespace...etc. +HOST_IFC='ve' +NS_IFC='vens' +NS='ns' + +find_mnt() { + cat /proc/mounts | \ + awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' +} + +# Init cgroup2 vars +init_cgrp2_vars() { + CGRP2_ROOT=$(find_mnt cgroup2) + if [ -z "$CGRP2_ROOT" ] + then + CGRP2_ROOT='/mnt/cgroup2' + MOUNT_CGRP2="yes" + fi + CGRP2_TC="$CGRP2_ROOT/tc" + CGRP2_TC_LEAF="$CGRP2_TC/leaf" +} + +# Init bpf fs vars +init_bpf_fs_vars() { + local bpf_fs_root=$(find_mnt bpf) + [ -n "$bpf_fs_root" ] || return -1 + BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" +} + +setup_cgrp2() { + case $1 in + start) + if [ "$MOUNT_CGRP2" == 'yes' ] + then + [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT + mount -t cgroup2 none $CGRP2_ROOT || return $? + fi + mkdir -p $CGRP2_TC_LEAF + ;; + *) + rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC + [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT + ;; + esac +} + +setup_bpf_cgrp2_array() { + local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" + case $1 in + start) + $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC + ;; + *) + [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array + ;; + esac +} + +setup_net() { + case $1 in + start) + $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? + $IP link set dev $HOST_IFC up || return $? + sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 + + $IP netns add ns || return $? + $IP link set dev $NS_IFC netns ns || return $? + $IP -n $NS link set dev $NS_IFC up || return $? + $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 + $TC qdisc add dev $HOST_IFC clsact || return $? + $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? + ;; + *) + $IP netns del $NS + $IP link del $HOST_IFC + ;; + esac +} + +run_in_cgrp() { + # Fork another bash and move it under the specified cgroup. + # It makes the cgroup cleanup easier at the end of the test. + cmd='echo $$ > ' + cmd="$cmd $1/cgroup.procs; exec $2" + bash -c "$cmd" +} + +do_test() { + run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" + local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ + awk '/drop/{print substr($7, 0, index($7, ",")-1)}') + if [[ $dropped -eq 0 ]] + then + echo "FAIL" + return 1 + else + echo "Successfully filtered $dropped packets" + return 0 + fi +} + +do_exit() { + if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] + then + echo "------ DEBUG ------" + echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo + echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo + if [ -d "$BPF_FS_TC_SHARE" ] + then + echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo + fi + echo "Host net:" + $IP netns + $IP link show dev $HOST_IFC + $IP -6 a show dev $HOST_IFC + $TC -s qdisc show dev $HOST_IFC + echo + echo "$NS net:" + $IP -n $NS link show dev $NS_IFC + $IP -n $NS -6 link show dev $NS_IFC + echo "------ DEBUG ------" + echo + fi + + if [ "$MODE" != 'nocleanup' ] + then + setup_net stop + setup_bpf_cgrp2_array stop + setup_cgrp2 stop + fi +} + +init_cgrp2_vars +init_bpf_fs_vars + +while [[ $# -ge 1 ]] +do + a="$1" + case $a in + debug) + DEBUG='yes' + shift 1 + ;; + cleanup-only) + MODE='cleanuponly' + shift 1 + ;; + no-cleanup) + MODE='nocleanup' + shift 1 + ;; + *) + echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" + echo " debug: Print cgrp and network setup details at the end of the test" + echo " cleanup-only: Try to cleanup things from last test. No test will be run" + echo " no-cleanup: Run the test but don't do cleanup at the end" + echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" + echo + exit -1 + ;; + esac +done + +trap do_exit 0 + +[ "$MODE" == 'cleanuponly' ] && exit + +setup_cgrp2 start || exit $? +setup_net start || exit $? +init_bpf_fs_vars || exit $? +setup_bpf_cgrp2_array start || exit $? +do_test +echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c new file mode 100644 index 0000000..2732c37 --- /dev/null +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +SEC("filter") +int handle_egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + struct ipv6hdr *ip6h = data + sizeof(*eth); + void *data_end = (void *)(long)skb->data_end; + char dont_care_msg[] = "dont care %04x %d\n"; + char pass_msg[] = "pass\n"; + char reject_msg[] = "reject\n"; + + /* single length check */ + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (eth->h_proto != htons(ETH_P_IPV6) || + ip6h->nexthdr != IPPROTO_ICMPV6) { + bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), + eth->h_proto, ip6h->nexthdr); + return TC_ACT_OK; + } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + bpf_trace_printk(pass_msg, sizeof(pass_msg)); + return TC_ACT_OK; + } else { + bpf_trace_printk(reject_msg, sizeof(reject_msg)); + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; -- cgit v0.10.2