diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/linux/filter.h | 4 | ||||
-rw-r--r-- | include/linux/netdevice.h | 17 | ||||
-rw-r--r-- | include/linux/perf_event.h | 4 | ||||
-rw-r--r-- | include/linux/skbuff.h | 7 | ||||
-rw-r--r-- | include/linux/stmmac.h | 2 | ||||
-rw-r--r-- | include/linux/trace_events.h | 9 | ||||
-rw-r--r-- | include/linux/udp.h | 16 | ||||
-rw-r--r-- | include/net/inet6_hashtables.h | 12 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 47 | ||||
-rw-r--r-- | include/net/ip.h | 3 | ||||
-rw-r--r-- | include/net/ip_tunnels.h | 11 | ||||
-rw-r--r-- | include/net/ipv6.h | 6 | ||||
-rw-r--r-- | include/net/protocol.h | 3 | ||||
-rw-r--r-- | include/net/request_sock.h | 31 | ||||
-rw-r--r-- | include/net/route.h | 7 | ||||
-rw-r--r-- | include/net/sock.h | 82 | ||||
-rw-r--r-- | include/net/tcp.h | 27 | ||||
-rw-r--r-- | include/net/transp_v6.h | 3 | ||||
-rw-r--r-- | include/net/udp.h | 22 | ||||
-rw-r--r-- | include/net/udp_tunnel.h | 7 | ||||
-rw-r--r-- | include/net/vxlan.h | 69 | ||||
-rw-r--r-- | include/trace/perf.h | 23 | ||||
-rw-r--r-- | include/trace/trace_events.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/net_tstamp.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/sock_diag.h | 1 |
28 files changed, 291 insertions, 139 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b..b2365a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -131,6 +131,7 @@ struct bpf_prog_type_list { struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; + u32 max_ctx_offset; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; @@ -160,6 +161,7 @@ struct bpf_array { #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index a51a536..43aa1f8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,14 +465,10 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); -int __sk_detach_filter(struct sock *sk, bool locked); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d0..cb4e508 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2159,23 +2159,6 @@ struct packet_offload { struct list_head list; }; -struct udp_offload; - -struct udp_offload_callbacks { - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff); - int (*gro_complete)(struct sk_buff *skb, - int nhoff, - struct udp_offload *uoff); -}; - -struct udp_offload { - __be16 port; - u8 ipproto; - struct udp_offload_callbacks callbacks; -}; - /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f291275..eb41b53 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -882,8 +882,6 @@ static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned lo */ static inline void perf_fetch_caller_regs(struct pt_regs *regs) { - memset(regs, 0, sizeof(*regs)); - perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } @@ -1018,7 +1016,7 @@ static inline bool perf_paranoid_kernel(void) } extern void perf_event_init(void); -extern void perf_tp_event(u64 addr, u64 count, void *record, +extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 15d0df9..0073812 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len); +static inline void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb) +{ + __skb_free_datagram_locked(sk, skb, 0); +} int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e6bc30a..ffdaca9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -137,5 +137,7 @@ struct plat_stmmacenet_data { void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; + int has_gmac4; + bool tso_en; }; #endif diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0810f81b..fe64412 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -569,6 +569,7 @@ extern int trace_define_field(struct trace_event_call *call, const char *type, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); +extern int trace_event_get_offsets(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) @@ -605,15 +606,15 @@ extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp); +void perf_trace_buf_update(void *record, u16 type); +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); static inline void -perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, +perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { - perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif diff --git a/include/linux/udp.h b/include/linux/udp.h index 87c0949..d1fd8cd 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -71,6 +71,14 @@ struct udp_sock { */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); + + /* GRO functions for UDP socket */ + struct sk_buff ** (*gro_receive)(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sock *sk, + struct sk_buff *skb, + int nhoff); }; static inline struct udp_sock *udp_sk(const struct sock *sk) @@ -98,11 +106,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -#define udp_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry(__sk, list) \ + hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 28332bd..b87beca 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, + bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); + *refcounted = true; if (sk) return sk; - + *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif); } @@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif) + int iif, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); + *refcounted = true; if (sk) return sk; return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif); + iif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 50f635c..0574493 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -100,14 +100,10 @@ struct inet_bind_hashbucket { /* * Sockets can be hashed in established or listening table - * We must use different 'nulls' end-of-chain value for listening - * hash table, or we might find a socket that was closed and - * reallocated/inserted into established hash table */ -#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_nulls_head head; + struct hlist_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ -/* - * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need +/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. */ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, + bool *refcounted) { u16 hnum = ntohs(dport); - struct sock *sk = __inet_lookup_established(net, hashinfo, - saddr, sport, daddr, hnum, dif); + struct sock *sk; - return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sk = __inet_lookup_established(net, hashinfo, saddr, sport, + daddr, hnum, dif); + *refcounted = true; + if (sk) + return sk; + *refcounted = false; + return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + sport, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, @@ -325,12 +324,13 @@ static inline struct sock *inet_lookup(struct net *net, const int dif) { struct sock *sk; + bool refcounted; - local_bh_disable(); sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif); - local_bh_enable(); + dport, dif, &refcounted); + if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; return sk; } @@ -338,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, - const __be16 dport) + const __be16 dport, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); const struct iphdr *iph = ip_hdr(skb); + *refcounted = true; if (sk) return sk; - else - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, - doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb)); + + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + refcounted); } u32 sk_ehashfn(const struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index fad74d3..93725e5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -56,6 +56,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb) } struct ipcm_cookie { + struct sockcm_cookie sockc; __be32 addr; int oif; struct ip_options_rcu *opt; @@ -550,7 +551,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 56050f9..16435d8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -295,8 +295,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet); +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet); + +static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool xnet) +{ + return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); +} + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0aeb97..55ee1eb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -867,7 +867,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag); + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); int ip6_push_pending_frames(struct sock *sk); @@ -884,7 +885,8 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, - unsigned int flags, int dontfrag); + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { diff --git a/include/net/protocol.h b/include/net/protocol.h index da689f5..bf36ca3 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -107,9 +107,6 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); -int udp_add_offload(struct net *net, struct udp_offload *prot); -void udp_del_offload(struct udp_offload *prot); - #if IS_ENABLED(CONFIG_IPV6) int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index f49759d..6ebe13e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, struct request_sock *req; req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - - if (req) { - req->rsk_ops = ops; - if (attach_listener) { - sock_hold(sk_listener); - req->rsk_listener = sk_listener; - } else { - req->rsk_listener = NULL; + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; } - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&req->rsk_refcnt, 0); + req->rsk_listener = sk_listener; } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + atomic_set(&req->rsk_refcnt, 0); + return req; } diff --git a/include/net/route.h b/include/net/route.h index 9b0a523..f4b11ee 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -322,10 +322,11 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable static inline int inet_iif(const struct sk_buff *skb) { - int iif = skb_rtable(skb)->rt_iif; + struct rtable *rt = skb_rtable(skb); + + if (rt && rt->rt_iif) + return rt->rt_iif; - if (iif) - return iif; return skb->skb_iif; } diff --git a/include/net/sock.h b/include/net/sock.h index 255d3e0..81d6fec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -178,7 +178,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -438,6 +438,7 @@ struct sock { struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; + struct rcu_head sk_rcu; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -456,28 +457,32 @@ struct sock { #define SK_CAN_REUSE 1 #define SK_FORCE_REUSE 2 +int sk_set_peek_off(struct sock *sk, int val); + static inline int sk_peek_offset(struct sock *sk, int flags) { - if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0)) - return sk->sk_peek_off; - else - return 0; + if (unlikely(flags & MSG_PEEK)) { + s32 off = READ_ONCE(sk->sk_peek_off); + if (off >= 0) + return off; + } + + return 0; } static inline void sk_peek_offset_bwd(struct sock *sk, int val) { - if (sk->sk_peek_off >= 0) { - if (sk->sk_peek_off >= val) - sk->sk_peek_off -= val; - else - sk->sk_peek_off = 0; + s32 off = READ_ONCE(sk->sk_peek_off); + + if (unlikely(off >= 0)) { + off = max_t(s32, off - val, 0); + WRITE_ONCE(sk->sk_peek_off, off); } } static inline void sk_peek_offset_fwd(struct sock *sk, int val) { - if (sk->sk_peek_off >= 0) - sk->sk_peek_off += val; + sk_peek_offset_bwd(sk, -val); } /* @@ -669,18 +674,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -720,6 +725,7 @@ enum sock_flags { */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ + SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -1327,7 +1333,12 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) static inline void sock_release_ownership(struct sock *sk) { - sk->sk_lock.owned = 0; + if (sk->sk_lock.owned) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + } } /* @@ -1349,6 +1360,16 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) +#ifdef CONFIG_LOCKDEP +static inline bool lockdep_sock_is_held(const struct sock *csk) +{ + struct sock *sk = (struct sock *)csk; + + return lockdep_is_held(&sk->sk_lock) || + lockdep_is_held(&sk->sk_lock.slock); +} +#endif + void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) @@ -1418,8 +1439,11 @@ void sk_send_sigurg(struct sock *sk); struct sockcm_cookie { u32 mark; + u16 tsflags; }; +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc); @@ -1584,8 +1608,8 @@ static inline void sk_rethink_txhash(struct sock *sk) static inline struct dst_entry * __sk_dst_get(struct sock *sk) { - return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + return rcu_dereference_check(sk->sk_dst_cache, + lockdep_sock_is_held(sk)); } static inline struct dst_entry * @@ -1857,6 +1881,7 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); @@ -2007,6 +2032,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); } +static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +{ + int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + + atomic_add(segs, &sk->sk_drops); +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2054,19 +2086,21 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, sk->sk_stamp = skb->tstamp; } -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet + * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * * Note : callers should take care of initial *tx_flags value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, + __u8 *tx_flags) { - if (unlikely(sk->sk_tsflags)) - __sock_tx_timestamp(sk, tx_flags); + if (unlikely(tsflags)) + __sock_tx_timestamp(tsflags, tx_flags); if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } diff --git a/include/net/tcp.h b/include/net/tcp.h index b91370f..74d3ed5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -754,7 +754,8 @@ struct tcp_skb_cb { TCPCB_REPAIRED) __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ - /* 1 byte hole */ + __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ + unused:7; __u32 ack_seq; /* Sequence number ACK'd */ union { struct inet_skb_parm h4; @@ -1039,17 +1040,6 @@ static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp) return 3; } -/* Slow start with delack produces 3 packets of burst, so that - * it is safe "de facto". This will be the default - same as - * the default reordering threshold - but if reordering increases, - * we must be able to allow cwnd to burst at least this much in order - * to not pull it back when holes are filled. - */ -static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) -{ - return tp->reordering; -} - /* Returns end sequence number of the receiver's advertised window */ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) { @@ -1846,4 +1836,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) tp->data_segs_in += segs_in; } +/* + * TCP listen path runs lockless. + * We forced "struct sock" to be const qualified to make sure + * we don't modify one of its field by mistake. + * Here, we increment sk_drops which is an atomic_t, so we can safely + * make sock writable again. + */ +static inline void tcp_listendrop(const struct sock *sk) +{ + atomic_inc(&((struct sock *)sk)->sk_drops); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); +} + #endif /* _TCP_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index b927413d..2b1c345 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -42,7 +42,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag); + int *hlimit, int *tclass, int *dontfrag, + struct sockcm_cookie *sockc); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket); diff --git a/include/net/udp.h b/include/net/udp.h index 92927f7..3c5a65e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -59,7 +59,7 @@ struct udp_skb_cb { * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_nulls_head head; + struct hlist_head head; int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); @@ -158,9 +158,21 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr, void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); +static inline void udp_csum_pull_header(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_NONE) + skb->csum = csum_partial(udp_hdr(skb), sizeof(struct udphdr), + skb->csum); + skb_pull_rcsum(skb, sizeof(struct udphdr)); + UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); +} + +typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, + __be16 dport); + struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh); -int udp_gro_complete(struct sk_buff *skb, int nhoff); + struct udphdr *uh, udp_lookup_t lookup); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { @@ -260,6 +272,8 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *tbl, struct sk_buff *skb); +struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -269,6 +283,8 @@ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *tbl, struct sk_buff *skb); +struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index b831140..2dcf1de 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -64,6 +64,11 @@ static inline int udp_sock_create(struct net *net, typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); +typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb); +typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, + int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ @@ -71,6 +76,8 @@ struct udp_tunnel_sock_cfg { __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_destroy_t encap_destroy; + udp_tunnel_gro_receive_t gro_receive; + udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 73ed2e9..2f168f0 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -119,6 +119,64 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +/* + * VXLAN Generic Protocol Extension (VXLAN_F_GPE): + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Ver = Version. Indicates VXLAN GPE protocol version. + * + * P = Next Protocol Bit. The P bit is set to indicate that the + * Next Protocol field is present. + * + * O = OAM Flag Bit. The O bit is set to indicate that the packet + * is an OAM packet. + * + * Next Protocol = This 8 bit field indicates the protocol header + * immediately following the VXLAN GPE header. + * + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 + */ + +struct vxlanhdr_gpe { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 oam_flag:1, + reserved_flags1:1, + np_applied:1, + instance_applied:1, + version:2, +reserved_flags2:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved_flags2:2, + version:2, + instance_applied:1, + np_applied:1, + reserved_flags1:1, + oam_flag:1; +#endif + u8 reserved_flags3; + u8 reserved_flags4; + u8 next_protocol; + __be32 vx_vni; +}; + +/* VXLAN-GPE header flags. */ +#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) +#define VXLAN_HF_NP cpu_to_be32(BIT(26)) +#define VXLAN_HF_OAM cpu_to_be32(BIT(24)) + +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ + cpu_to_be32(0xff)) + +/* VXLAN-GPE header Next Protocol. */ +#define VXLAN_GPE_NP_IPV4 0x01 +#define VXLAN_GPE_NP_IPV6 0x02 +#define VXLAN_GPE_NP_ETHERNET 0x03 +#define VXLAN_GPE_NP_NSH 0x04 + struct vxlan_metadata { u32 gbp; }; @@ -131,7 +189,6 @@ struct vxlan_sock { struct rcu_head rcu; struct hlist_head vni_list[VNI_HASH_SIZE]; atomic_t refcnt; - struct udp_offload udp_offloads; u32 flags; }; @@ -206,16 +263,26 @@ struct vxlan_dev { #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 +#define VXLAN_F_GPE 0x4000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ + VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA) +/* Flags that can be set together with VXLAN_F_GPE. */ +#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ + VXLAN_F_IPV6 | \ + VXLAN_F_UDP_ZERO_CSUM_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_COLLECT_METADATA) + struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/trace/perf.h b/include/trace/perf.h index 26486fc..a182306 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -20,9 +20,6 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) -#undef __perf_addr -#define __perf_addr(a) (__addr = (a)) - #undef __perf_count #define __perf_count(c) (__count = (c)) @@ -37,8 +34,9 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ + struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ - u64 __addr = 0, __count = 1; \ + u64 __count = 1; \ struct task_struct *__task = NULL; \ struct hlist_head *head; \ int __entry_size; \ @@ -48,7 +46,7 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (__builtin_constant_p(!__task) && !__task && \ + if (!prog && __builtin_constant_p(!__task) && !__task && \ hlist_empty(head)) \ return; \ \ @@ -56,8 +54,7 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - entry = perf_trace_buf_prepare(__entry_size, \ - event_call->event.type, &__regs, &rctx); \ + entry = perf_trace_buf_alloc(__entry_size, &__regs, &rctx); \ if (!entry) \ return; \ \ @@ -67,8 +64,16 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, __regs, head, __task); \ + if (prog) { \ + *(struct pt_regs **)entry = __regs; \ + if (!trace_call_bpf(prog, entry) || hlist_empty(head)) { \ + perf_swevent_put_recursion_context(rctx); \ + return; \ + } \ + } \ + perf_trace_buf_submit(entry, __entry_size, rctx, \ + event_call->event.type, __count, __regs, \ + head, __task); \ } /* diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 170c93b..80679a9 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -652,9 +652,6 @@ static inline notrace int trace_event_get_offsets_##call( \ #undef TP_fast_assign #define TP_fast_assign(args...) args -#undef __perf_addr -#define __perf_addr(a) (a) - #undef __perf_count #define __perf_count(c) (c) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23917bb..70eda5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, }; #define BPF_PSEUDO_MAP_FD 1 diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c488066..9427f17 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -488,6 +488,7 @@ enum { IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 6d1abea..264e515 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -31,6 +31,16 @@ enum { SOF_TIMESTAMPING_LAST }; +/* + * SO_TIMESTAMPING flags are either for recording a packet timestamp or for + * reporting the timestamp to user space. + * Recording flags can be set both via socket options and control messages. + */ +#define SOF_TIMESTAMPING_TX_RECORD_MASK (SOF_TIMESTAMPING_TX_HARDWARE | \ + SOF_TIMESTAMPING_TX_SOFTWARE | \ + SOF_TIMESTAMPING_TX_SCHED | \ + SOF_TIMESTAMPING_TX_ACK) + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index bae2d80..7ff505d 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -20,6 +20,7 @@ enum { SK_MEMINFO_WMEM_QUEUED, SK_MEMINFO_OPTMEM, SK_MEMINFO_BACKLOG, + SK_MEMINFO_DROPS, SK_MEMINFO_VARS, }; |