summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/ipv6/datagram.c9
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ila/ila_xlat.c3
-rw-r--r--net/ipv6/inet6_hashtables.c62
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_offload.h3
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ipv6_sockglue.c9
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/ipv6/tcp_ipv6.c34
-rw-r--r--net/ipv6/udp.c249
-rw-r--r--net/ipv6/udp_offload.c11
16 files changed, 194 insertions, 236 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fbd90b..5e9d6bf 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,9 +8,10 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
- exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
+ udp_offload.o
-ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b11c37c..bfa86f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -64,6 +64,8 @@
#include <asm/uaccess.h>
#include <linux/mroute6.h>
+#include "ip6_offload.h"
+
MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
MODULE_LICENSE("GPL");
@@ -561,6 +563,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
@@ -958,6 +961,10 @@ static int __init inet6_init(void)
if (err)
goto udplitev6_fail;
+ err = udpv6_offload_init();
+ if (err)
+ goto udpv6_offload_fail;
+
err = tcpv6_init();
if (err)
goto tcpv6_fail;
@@ -987,6 +994,8 @@ pingv6_fail:
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
+ udpv6_offload_exit();
+udpv6_offload_fail:
udplitev6_exit();
udplitev6_fail:
udpv6_exit();
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4281621..a73d701 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -685,7 +685,8 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag)
+ int *hlimit, int *tclass, int *dontfrag,
+ struct sockcm_cookie *sockc)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
@@ -702,6 +703,12 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
+ if (cmsg->cmsg_level == SOL_SOCKET) {
+ if (__sock_cmsg_send(sk, msg, cmsg, sockc))
+ return -EINVAL;
+ continue;
+ }
+
if (cmsg->cmsg_level != SOL_IPV6)
continue;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0a37ddc..6b573eb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
+ struct sockcm_cookie sockc_unused = {0};
int iif = 0;
int addr_type = 0;
int len;
@@ -527,7 +528,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit,
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, &sockc_unused);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -566,6 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int hlimit;
u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ struct sockcm_cookie sockc_unused = {0};
saddr = &ipv6_hdr(skb)->daddr;
@@ -617,7 +619,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag);
+ np->dontfrag, &sockc_unused);
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 295ca29..0b03533 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -501,7 +501,8 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
- return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter);
+ return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
+ GFP_KERNEL);
}
static int ila_nl_dump_done(struct netlink_callback *cb)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 70f2628..f167838 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -69,7 +69,6 @@ struct sock *__inet6_lookup_established(struct net *net,
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
- rcu_read_lock();
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
@@ -90,7 +89,6 @@ begin:
out:
sk = NULL;
found:
- rcu_read_unlock();
return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);
@@ -122,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+/* called with rcu_read_lock() */
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -129,39 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
- struct sock *sk;
- const struct hlist_nulls_node *node;
- struct sock *result;
- int score, hiscore, matches = 0, reuseport = 0;
- bool select_ok = true;
- u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+ int score, hiscore = 0, matches = 0, reuseport = 0;
+ struct sock *sk, *result = NULL;
+ u32 phash = 0;
- rcu_read_lock();
-begin:
- result = NULL;
- hiscore = 0;
- sk_nulls_for_each(sk, node, &ilb->head) {
+ sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif);
if (score > hiscore) {
- hiscore = score;
- result = sk;
reuseport = sk->sk_reuseport;
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
- sk2 = reuseport_select_sock(sk, phash,
- skb, doff);
- if (sk2) {
- result = sk2;
- goto found;
- }
- }
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ hiscore = score;
} else if (score == hiscore && reuseport) {
matches++;
if (reciprocal_scale(phash, matches) == 0)
@@ -169,25 +156,6 @@ begin:
phash = next_pseudo_random32(phash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
- goto begin;
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, daddr,
- dif) < hiscore)) {
- sock_put(result);
- select_ok = false;
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
@@ -199,12 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
const int dif)
{
struct sock *sk;
+ bool refcounted;
- local_bh_disable();
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- ntohs(dport), dif);
- local_bh_enable();
-
+ ntohs(dport), dif, &refcounted);
+ if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
return sk;
}
EXPORT_SYMBOL_GPL(inet6_lookup);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dc2db4f..35d3ddc 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -372,6 +372,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
if (olen > 0) {
struct msghdr msg;
struct flowi6 flowi6;
+ struct sockcm_cookie sockc_junk;
int junk;
err = -ENOMEM;
@@ -390,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
memset(&flowi6, 0, sizeof(flowi6));
err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
- &junk, &junk, &junk);
+ &junk, &junk, &junk, &sockc_junk);
if (err)
goto done;
err = -EINVAL;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 82e9f30..204af22 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -325,8 +325,6 @@ static int __init ipv6_offload_init(void)
if (tcpv6_offload_init() < 0)
pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
- if (udp_offload_init() < 0)
- pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
index 2e155c6..96b40e4 100644
--- a/net/ipv6/ip6_offload.h
+++ b/net/ipv6/ip6_offload.h
@@ -12,7 +12,8 @@
#define __ip6_offload_h
int ipv6_exthdrs_offload_init(void);
-int udp_offload_init(void);
+int udpv6_offload_init(void);
+int udpv6_offload_exit(void);
int tcpv6_offload_init(void);
#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bc972e7..171518e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1258,7 +1258,8 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- unsigned int flags, int dontfrag)
+ unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
@@ -1329,7 +1330,7 @@ emsgsize:
csummode = CHECKSUM_PARTIAL;
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
- sock_tx_timestamp(sk, &tx_flags);
+ sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -1565,7 +1566,8 @@ int ip6_append_data(struct sock *sk,
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, int hlimit,
int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag)
+ struct rt6_info *rt, unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1593,7 +1595,8 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, dontfrag);
+ from, length, transhdrlen, flags, dontfrag,
+ sockc);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1752,7 +1755,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int hlimit, int tclass,
struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag)
+ int dontfrag, const struct sockcm_cookie *sockc)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
@@ -1779,7 +1782,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, dontfrag);
+ flags, dontfrag, sockc);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4449ad1..4ff4b29 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -407,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt,
+ lockdep_sock_is_held(sk));
opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
@@ -471,6 +472,7 @@ sticky_done:
struct ipv6_txoptions *opt = NULL;
struct msghdr msg;
struct flowi6 fl6;
+ struct sockcm_cookie sockc_junk;
int junk;
memset(&fl6, 0, sizeof(fl6));
@@ -503,7 +505,7 @@ sticky_done:
msg.msg_control = (void *)(opt+1);
retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
- &junk, &junk);
+ &junk, &junk, &sockc_junk);
if (retv)
goto done;
update:
@@ -1123,7 +1125,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct ipv6_txoptions *opt;
lock_sock(sk);
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt,
+ lockdep_sock_is_held(sk));
len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c382db7..da1cff7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -62,6 +62,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
+ struct sockcm_cookie junk = {0};
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -144,7 +145,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
0, hlimit,
np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, &junk);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fa59dd7..b07ce21 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -745,6 +745,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst = NULL;
struct raw6_frag_vec rfv;
struct flowi6 fl6;
+ struct sockcm_cookie sockc;
int addr_len = msg->msg_namelen;
int hlimit = -1;
int tclass = -1;
@@ -821,13 +822,15 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (fl6.flowi6_oif == 0)
fl6.flowi6_oif = sk->sk_bound_dev_if;
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ &hlimit, &tclass, &dontfrag,
+ &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -897,7 +900,7 @@ back_from_confirm:
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, &sockc);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 711d209..0e621bc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -234,7 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -858,6 +858,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
return;
#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
@@ -875,16 +876,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
- return;
+ goto out;
- rcu_read_lock();
key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
if (!key)
- goto release_sk1;
+ goto out;
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto release_sk1;
+ goto out;
}
#endif
@@ -898,11 +898,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
-release_sk1:
- if (sk1) {
- rcu_read_unlock();
- sock_put(sk1);
- }
+out:
+ rcu_read_unlock();
#endif
}
@@ -967,7 +964,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
&tcp_request_sock_ipv6_ops, sk, skb);
drop:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ tcp_listendrop(sk);
return 0; /* don't send reset */
}
@@ -1172,7 +1169,7 @@ out_overflow:
out_nonewsk:
dst_release(dst);
out:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ tcp_listendrop(sk);
return NULL;
}
@@ -1351,6 +1348,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
{
const struct tcphdr *th;
const struct ipv6hdr *hdr;
+ bool refcounted;
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1381,7 +1379,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
- th->source, th->dest, inet6_iif(skb));
+ th->source, th->dest, inet6_iif(skb),
+ &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1404,6 +1403,7 @@ process:
goto lookup;
}
sock_hold(sk);
+ refcounted = true;
nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
@@ -1460,7 +1460,8 @@ process:
bh_unlock_sock(sk);
put_and_return:
- sock_put(sk);
+ if (refcounted)
+ sock_put(sk);
return ret ? -1 : 0;
no_tcp_socket:
@@ -1483,7 +1484,9 @@ discard_it:
return 0;
discard_and_relse:
- sock_put(sk);
+ sk_drops_add(sk, skb);
+ if (refcounted)
+ sock_put(sk);
goto discard_it;
do_time_wait:
@@ -1514,6 +1517,7 @@ do_time_wait:
inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
+ refcounted = false;
goto process;
}
/* Fall through to ACK */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8125931..a050b70 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -213,37 +213,28 @@ static struct sock *udp6_lib_lookup2(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
- bool select_ok = true;
u32 hash = 0;
-begin:
result = NULL;
badness = -1;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- select_ok = false;
- goto found;
- }
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -251,27 +242,10 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
+/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
@@ -279,15 +253,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
- bool select_ok = true;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -309,34 +280,26 @@ struct sock *__udp6_lib_lookup(struct net *net,
&in6addr_any, hnum, dif,
hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
-
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- select_ok = false;
- goto found;
- }
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -344,25 +307,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, saddr, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -382,12 +326,37 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
udptable, skb);
}
+struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct net_device *dev =
+ skb_dst(skb) ? skb_dst(skb)->dev : skb->dev;
+
+ return __udp6_lib_lookup(dev_net(dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+ &udp_table, skb);
+}
+EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
+
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+#endif
/*
* This should be easy, if there is something there we
@@ -401,7 +370,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, off = 0;
+ int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
@@ -415,15 +384,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
+ peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err);
if (!skb)
- goto out;
+ return err;
- ulen = skb->len - sizeof(struct udphdr);
+ ulen = skb->len;
copied = len;
- if (copied > ulen)
- copied = ulen;
+ if (copied > ulen - off)
+ copied = ulen - off;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
@@ -435,17 +405,16 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}
if (checksum_valid || skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
- msg, copied);
+ err = skb_copy_datagram_msg(skb, off, msg, copied);
else {
- err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg);
+ err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL)
goto csum_copy_err;
}
@@ -462,7 +431,8 @@ try_again:
UDP_MIB_INERRORS,
is_udplite);
}
- goto out_free;
+ skb_free_datagram_locked(sk, skb);
+ return err;
}
if (!peeked) {
if (is_udp4)
@@ -510,9 +480,7 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
-out_free:
- skb_free_datagram_locked(sk, skb);
-out:
+ __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
@@ -585,7 +553,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -598,7 +566,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_incoming_cpu_update(sk);
}
- rc = sock_queue_rcv_skb(sk, skb);
+ rc = __sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -692,8 +660,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (rcu_access_pointer(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto csum_error;
+ if (sk_filter(sk, skb))
+ goto drop;
}
+ udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
@@ -747,33 +718,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
@@ -792,15 +736,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable, int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ struct sock *sk, *first = NULL;
const struct udphdr *uh = udp_hdr(skb);
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = inet6_iif(skb);
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ int dif = inet6_iif(skb);
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -811,27 +755,32 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_v6_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum) &&
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- (uh->check || udp_sk(sk)->no_check6_rx)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ continue;
+ if (!first) {
+ first = sk;
+ continue;
+ }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
}
- }
- spin_unlock(&hslot->lock);
+ if (udpv6_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -839,13 +788,13 @@ start_lookup:
goto start_lookup;
}
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udpv6_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -853,10 +802,10 @@ start_lookup:
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
- struct sock *sk;
struct udphdr *uh;
- const struct in6_addr *saddr, *daddr;
+ struct sock *sk;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -910,7 +859,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
}
@@ -920,7 +868,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input */
if (ret > 0)
@@ -1128,6 +1075,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int connected = 0;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+ struct sockcm_cookie sockc;
/* destination address check */
if (sin6) {
@@ -1247,6 +1195,7 @@ do_udp_sendmsg:
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
fl6.flowi6_mark = sk->sk_mark;
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
@@ -1254,7 +1203,8 @@ do_udp_sendmsg:
opt->tot_len = sizeof(*opt);
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ &hlimit, &tclass, &dontfrag,
+ &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1321,7 +1271,7 @@ back_from_confirm:
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1348,7 +1298,8 @@ do_append_data:
err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
+ &sockc);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2b0fbe6..5429f6b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -153,7 +153,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
- return udp_gro_receive(head, skb, uh);
+ return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -173,7 +173,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
}
- return udp_gro_complete(skb, nhoff);
+ return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
static const struct net_offload udpv6_offload = {
@@ -184,7 +184,12 @@ static const struct net_offload udpv6_offload = {
},
};
-int __init udp_offload_init(void)
+int udpv6_offload_init(void)
{
return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
}
+
+int udpv6_offload_exit(void)
+{
+ return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+}