summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c53
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c10
-rw-r--r--net/ipv4/ping.c14
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_memcontrol.c87
9 files changed, 101 insertions, 88 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b..e67da4e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1386,6 +1386,17 @@ out:
return pp;
}
+int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
+{
+ if (sk->sk_family == AF_INET)
+ return ip_recv_error(sk, msg, len, addr_len);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
+#endif
+ return -EINVAL;
+}
+
static int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3e86101..1a7e979 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
.validate = vti_tunnel_validate,
.newlink = vti_newlink,
.changelink = vti_changelink,
+ .dellink = ip_tunnel_dellink,
.get_size = vti_get_size,
.fill_info = vti_fill_info,
};
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a054fe0..5c61328 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -56,11 +56,11 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static int ipv4_print_tuple(struct seq_file *s,
+static void ipv4_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "src=%pI4 dst=%pI4 ",
- &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 4c48e43..a460a87 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -94,7 +94,7 @@ static void ct_seq_stop(struct seq_file *s, void *v)
}
#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
{
int ret;
u32 len;
@@ -102,17 +102,15 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
if (ret)
- return 0;
+ return;
- ret = seq_printf(s, "secctx=%s ", secctx);
+ seq_printf(s, "secctx=%s ", secctx);
security_release_secctx(secctx, len);
- return ret;
}
#else
-static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
{
- return 0;
}
#endif
@@ -141,47 +139,52 @@ static int ct_seq_show(struct seq_file *s, void *v)
NF_CT_ASSERT(l4proto);
ret = -ENOSPC;
- if (seq_printf(s, "%-8s %u %ld ",
- l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
- goto release;
+ seq_printf(s, "%-8s %u %ld ",
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+
+ if (l4proto->print_conntrack)
+ l4proto->print_conntrack(s, ct);
- if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
+ if (seq_has_overflowed(s))
goto release;
- if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- l3proto, l4proto))
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
- if (seq_printf(s, "[UNREPLIED] "))
- goto release;
+ seq_printf(s, "[UNREPLIED] ");
- if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- l3proto, l4proto))
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status))
- if (seq_printf(s, "[ASSURED] "))
- goto release;
+ seq_printf(s, "[ASSURED] ");
#ifdef CONFIG_NF_CONNTRACK_MARK
- if (seq_printf(s, "mark=%u ", ct->mark))
- goto release;
+ seq_printf(s, "mark=%u ", ct->mark);
#endif
- if (ct_show_secctx(s, ct))
- goto release;
+ ct_show_secctx(s, ct);
- if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
+
+ if (seq_has_overflowed(s))
goto release;
+
ret = 0;
release:
nf_ct_put(ct);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index b91b264..80d5554 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -72,13 +72,13 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
}
/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
+static void icmp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
}
static unsigned int *icmp_get_timeouts(struct net *net)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c98..5d740cc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -217,6 +217,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
&ipv6_hdr(skb)->daddr))
continue;
#endif
+ } else {
+ continue;
}
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
@@ -853,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (flags & MSG_ERRQUEUE) {
- if (family == AF_INET) {
- return ip_recv_error(sk, msg, len, addr_len);
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (family == AF_INET6) {
- return pingv6_ops.ipv6_recv_error(sk, msg, len,
- addr_len);
-#endif
- }
- }
+ if (flags & MSG_ERRQUEUE)
+ return inet_recv_error(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c3..38c2bcb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1598,7 +1598,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
u32 urg_hole = 0;
if (unlikely(flags & MSG_ERRQUEUE))
- return ip_recv_error(sk, msg, len, addr_len);
+ return inet_recv_error(sk, msg, len, addr_len);
if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9c7d762..147be20 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -598,7 +598,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
if (th->rst)
return;
- if (skb_rtable(skb)->rt_type != RTN_LOCAL)
+ /* If sk not NULL, it means we did a successful lookup and incoming
+ * route had to be correct. prequeue might have dropped our dst.
+ */
+ if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 1d19135..27232713 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -9,13 +9,13 @@
int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
/*
- * The root cgroup does not use res_counters, but rather,
+ * The root cgroup does not use page_counters, but rather,
* rely on the data already collected by the network
* subsystem
*/
- struct res_counter *res_parent = NULL;
- struct cg_proto *cg_proto, *parent_cg;
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+ struct page_counter *counter_parent = NULL;
+ struct cg_proto *cg_proto, *parent_cg;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
@@ -29,9 +29,9 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
- res_parent = &parent_cg->memory_allocated;
+ counter_parent = &parent_cg->memory_allocated;
- res_counter_init(&cg_proto->memory_allocated, res_parent);
+ page_counter_init(&cg_proto->memory_allocated, counter_parent);
percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
return 0;
@@ -50,7 +50,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
-static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
+static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
{
struct cg_proto *cg_proto;
int i;
@@ -60,20 +60,17 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (!cg_proto)
return -EINVAL;
- if (val > RES_COUNTER_MAX)
- val = RES_COUNTER_MAX;
-
- ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
+ ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
if (ret)
return ret;
for (i = 0; i < 3; i++)
- cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT,
+ cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
sysctl_tcp_mem[i]);
- if (val == RES_COUNTER_MAX)
+ if (nr_pages == PAGE_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else if (val != RES_COUNTER_MAX) {
+ else {
/*
* The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation
@@ -102,11 +99,20 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
+enum {
+ RES_USAGE,
+ RES_LIMIT,
+ RES_MAX_USAGE,
+ RES_FAILCNT,
+};
+
+static DEFINE_MUTEX(tcp_limit_mutex);
+
static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned long long val;
+ unsigned long nr_pages;
int ret = 0;
buf = strstrip(buf);
@@ -114,10 +120,12 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
switch (of_cft(of)->private) {
case RES_LIMIT:
/* see memcontrol.c */
- ret = res_counter_memparse_write_strategy(buf, &val);
+ ret = page_counter_memparse(buf, &nr_pages);
if (ret)
break;
- ret = tcp_update_limit(memcg, val);
+ mutex_lock(&tcp_limit_mutex);
+ ret = tcp_update_limit(memcg, nr_pages);
+ mutex_unlock(&tcp_limit_mutex);
break;
default:
ret = -EINVAL;
@@ -126,43 +134,36 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}
-static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return default_val;
-
- return res_counter_read_u64(&cg_proto->memory_allocated, type);
-}
-
-static u64 tcp_read_usage(struct mem_cgroup *memcg)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
-
- return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE);
-}
-
static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
u64 val;
switch (cft->private) {
case RES_LIMIT:
- val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
+ if (!cg_proto)
+ return PAGE_COUNTER_MAX;
+ val = cg_proto->memory_allocated.limit;
+ val *= PAGE_SIZE;
break;
case RES_USAGE:
- val = tcp_read_usage(memcg);
+ if (!cg_proto)
+ val = atomic_long_read(&tcp_memory_allocated);
+ else
+ val = page_counter_read(&cg_proto->memory_allocated);
+ val *= PAGE_SIZE;
break;
case RES_FAILCNT:
+ if (!cg_proto)
+ return 0;
+ val = cg_proto->memory_allocated.failcnt;
+ break;
case RES_MAX_USAGE:
- val = tcp_read_stat(memcg, cft->private, 0);
+ if (!cg_proto)
+ return 0;
+ val = cg_proto->memory_allocated.watermark;
+ val *= PAGE_SIZE;
break;
default:
BUG();
@@ -183,10 +184,10 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
switch (of_cft(of)->private) {
case RES_MAX_USAGE:
- res_counter_reset_max(&cg_proto->memory_allocated);
+ page_counter_reset_watermark(&cg_proto->memory_allocated);
break;
case RES_FAILCNT:
- res_counter_reset_failcnt(&cg_proto->memory_allocated);
+ cg_proto->memory_allocated.failcnt = 0;
break;
}