diff options
Diffstat (limited to 'net')
71 files changed, 874 insertions, 430 deletions
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index af73bc3..410dd5e 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -101,11 +101,11 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) #define __get_rpn_parity(line) (((line) >> 3) & 0x7) +static DECLARE_WAIT_QUEUE_HEAD(rfcomm_wq); + static void rfcomm_schedule(void) { - if (!rfcomm_thread) - return; - wake_up_process(rfcomm_thread); + wake_up_all(&rfcomm_wq); } /* ---- RFCOMM FCS computation ---- */ @@ -2086,24 +2086,22 @@ static void rfcomm_kill_listener(void) static int rfcomm_run(void *unused) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -10); rfcomm_add_listener(BDADDR_ANY); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - - if (kthread_should_stop()) - break; + add_wait_queue(&rfcomm_wq, &wait); + while (!kthread_should_stop()) { /* Process stuff */ rfcomm_process_sessions(); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); + remove_wait_queue(&rfcomm_wq, &wait); rfcomm_kill_listener(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2ff9706..e5ec470 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -280,6 +280,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; diff --git a/net/core/dev.c b/net/core/dev.c index 945bbd0..3acff09 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7200,11 +7200,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) */ struct net *net; bool unregistering; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { - prepare_to_wait(&netdev_unregistering_wq, &wait, - TASK_UNINTERRUPTIBLE); unregistering = false; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { @@ -7216,9 +7215,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) if (!unregistering) break; __rtnl_unlock(); - schedule(); + + wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - finish_wait(&netdev_unregistering_wq, &wait); + remove_wait_queue(&netdev_unregistering_wq, &wait); } static void __net_exit default_device_exit_batch(struct list_head *net_list) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a688268..88e8de3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -365,11 +365,10 @@ static void rtnl_lock_unregistering_all(void) { struct net *net; bool unregistering; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { - prepare_to_wait(&netdev_unregistering_wq, &wait, - TASK_UNINTERRUPTIBLE); unregistering = false; rtnl_lock(); for_each_net(net) { @@ -381,9 +380,10 @@ static void rtnl_lock_unregistering_all(void) if (!unregistering) break; __rtnl_unlock(); - schedule(); + + wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - finish_wait(&netdev_unregistering_wq, &wait); + remove_wait_queue(&netdev_unregistering_wq, &wait); } /** @@ -1498,6 +1498,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); err = -EPERM; goto errout; } @@ -2685,13 +2686,20 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; - struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), - IFLA_EXT_MASK); - if (extfilt) - filter_mask = nla_get_u32(extfilt); + if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) { + struct nlattr *extfilt; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), + IFLA_EXT_MASK); + if (extfilt) { + if (nla_len(extfilt) < sizeof(filter_mask)) + return -EINVAL; + + filter_mask = nla_get_u32(extfilt); + } + } rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2798,6 +2806,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + have_flags = true; flags = nla_get_u16(attr); break; @@ -2868,6 +2879,9 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + have_flags = true; flags = nla_get_u16(attr); break; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b7fe5b..e67da4e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1386,6 +1386,17 @@ out: return pp; } +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +{ + if (sk->sk_family == AF_INET) + return ip_recv_error(sk, msg, len, addr_len); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); +#endif + return -EINVAL; +} + static int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3e86101..1a7e979 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .validate = vti_tunnel_validate, .newlink = vti_newlink, .changelink = vti_changelink, + .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, }; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a054fe0..5c61328 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -56,11 +56,11 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static int ipv4_print_tuple(struct seq_file *s, +static void ipv4_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%pI4 dst=%pI4 ", - &tuple->src.u3.ip, &tuple->dst.u3.ip); + seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 4c48e43..a460a87 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -94,7 +94,7 @@ static void ct_seq_stop(struct seq_file *s, void *v) } #ifdef CONFIG_NF_CONNTRACK_SECMARK -static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { int ret; u32 len; @@ -102,17 +102,15 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) ret = security_secid_to_secctx(ct->secmark, &secctx, &len); if (ret) - return 0; + return; - ret = seq_printf(s, "secctx=%s ", secctx); + seq_printf(s, "secctx=%s ", secctx); security_release_secctx(secctx, len); - return ret; } #else -static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { - return 0; } #endif @@ -141,47 +139,52 @@ static int ct_seq_show(struct seq_file *s, void *v) NF_CT_ASSERT(l4proto); ret = -ENOSPC; - if (seq_printf(s, "%-8s %u %ld ", - l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) - goto release; + seq_printf(s, "%-8s %u %ld ", + l4proto->name, nf_ct_protonum(ct), + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + + if (l4proto->print_conntrack) + l4proto->print_conntrack(s, ct); - if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) + if (seq_has_overflowed(s)) goto release; - if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto)) + print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, l4proto); + + if (seq_has_overflowed(s)) goto release; if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - if (seq_printf(s, "[UNREPLIED] ")) - goto release; + seq_printf(s, "[UNREPLIED] "); - if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto)) + print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, l4proto); + + if (seq_has_overflowed(s)) goto release; if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) - if (seq_printf(s, "[ASSURED] ")) - goto release; + seq_printf(s, "[ASSURED] "); #ifdef CONFIG_NF_CONNTRACK_MARK - if (seq_printf(s, "mark=%u ", ct->mark)) - goto release; + seq_printf(s, "mark=%u ", ct->mark); #endif - if (ct_show_secctx(s, ct)) - goto release; + ct_show_secctx(s, ct); - if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) + seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); + + if (seq_has_overflowed(s)) goto release; + ret = 0; release: nf_ct_put(ct); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index b91b264..80d5554 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -72,13 +72,13 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int icmp_print_tuple(struct seq_file *s, +static void icmp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); } static unsigned int *icmp_get_timeouts(struct net *net) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 57f7c98..5d740cc 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -217,6 +217,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) &ipv6_hdr(skb)->daddr)) continue; #endif + } else { + continue; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) @@ -853,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (flags & MSG_ERRQUEUE) { - if (family == AF_INET) { - return ip_recv_error(sk, msg, len, addr_len); -#if IS_ENABLED(CONFIG_IPV6) - } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len, - addr_len); -#endif - } - } + if (flags & MSG_ERRQUEUE) + return inet_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39ec0c3..38c2bcb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1598,7 +1598,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) - return ip_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len, addr_len); if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9c7d762..147be20 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -598,7 +598,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (skb_rtable(skb)->rt_type != RTN_LOCAL) + /* If sk not NULL, it means we did a successful lookup and incoming + * route had to be correct. prequeue might have dropped our dst. + */ + if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 1d19135..27232713 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -9,13 +9,13 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* - * The root cgroup does not use res_counters, but rather, + * The root cgroup does not use page_counters, but rather, * rely on the data already collected by the network * subsystem */ - struct res_counter *res_parent = NULL; - struct cg_proto *cg_proto, *parent_cg; struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct page_counter *counter_parent = NULL; + struct cg_proto *cg_proto, *parent_cg; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) @@ -29,9 +29,9 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) parent_cg = tcp_prot.proto_cgroup(parent); if (parent_cg) - res_parent = &parent_cg->memory_allocated; + counter_parent = &parent_cg->memory_allocated; - res_counter_init(&cg_proto->memory_allocated, res_parent); + page_counter_init(&cg_proto->memory_allocated, counter_parent); percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; @@ -50,7 +50,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(tcp_destroy_cgroup); -static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) +static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) { struct cg_proto *cg_proto; int i; @@ -60,20 +60,17 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (!cg_proto) return -EINVAL; - if (val > RES_COUNTER_MAX) - val = RES_COUNTER_MAX; - - ret = res_counter_set_limit(&cg_proto->memory_allocated, val); + ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); if (ret) return ret; for (i = 0; i < 3; i++) - cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, + cg_proto->sysctl_mem[i] = min_t(long, nr_pages, sysctl_tcp_mem[i]); - if (val == RES_COUNTER_MAX) + if (nr_pages == PAGE_COUNTER_MAX) clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); - else if (val != RES_COUNTER_MAX) { + else { /* * The active bit needs to be written after the static_key * update. This is what guarantees that the socket activation @@ -102,11 +99,20 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) return 0; } +enum { + RES_USAGE, + RES_LIMIT, + RES_MAX_USAGE, + RES_FAILCNT, +}; + +static DEFINE_MUTEX(tcp_limit_mutex); + static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long long val; + unsigned long nr_pages; int ret = 0; buf = strstrip(buf); @@ -114,10 +120,12 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = res_counter_memparse_write_strategy(buf, &val); + ret = page_counter_memparse(buf, &nr_pages); if (ret) break; - ret = tcp_update_limit(memcg, val); + mutex_lock(&tcp_limit_mutex); + ret = tcp_update_limit(memcg, nr_pages); + mutex_unlock(&tcp_limit_mutex); break; default: ret = -EINVAL; @@ -126,43 +134,36 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, return ret ?: nbytes; } -static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) -{ - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) - return default_val; - - return res_counter_read_u64(&cg_proto->memory_allocated, type); -} - -static u64 tcp_read_usage(struct mem_cgroup *memcg) -{ - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) - return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; - - return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); -} - static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); u64 val; switch (cft->private) { case RES_LIMIT: - val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); + if (!cg_proto) + return PAGE_COUNTER_MAX; + val = cg_proto->memory_allocated.limit; + val *= PAGE_SIZE; break; case RES_USAGE: - val = tcp_read_usage(memcg); + if (!cg_proto) + val = atomic_long_read(&tcp_memory_allocated); + else + val = page_counter_read(&cg_proto->memory_allocated); + val *= PAGE_SIZE; break; case RES_FAILCNT: + if (!cg_proto) + return 0; + val = cg_proto->memory_allocated.failcnt; + break; case RES_MAX_USAGE: - val = tcp_read_stat(memcg, cft->private, 0); + if (!cg_proto) + return 0; + val = cg_proto->memory_allocated.watermark; + val *= PAGE_SIZE; break; default: BUG(); @@ -183,10 +184,10 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, switch (of_cft(of)->private) { case RES_MAX_USAGE: - res_counter_reset_max(&cg_proto->memory_allocated); + page_counter_reset_watermark(&cg_proto->memory_allocated); break; case RES_FAILCNT: - res_counter_reset_failcnt(&cg_proto->memory_allocated); + cg_proto->memory_allocated.failcnt = 0; break; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4564e1f..0e32d2e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -502,11 +502,11 @@ static int ip6gre_rcv(struct sk_buff *skb) skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP + * - Change protocol to IPv6 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a071563..01e12d0 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,7 +69,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | + ~(SKB_GSO_TCPV4 | + SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b04ed72..8db6c98 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -79,15 +79,13 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, uh->source = src_port; uh->len = htons(skb->len); - uh->check = 0; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, - &sk->sk_v6_daddr, skb->len); + udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 31089d1..bcda14d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -905,6 +905,15 @@ static int vti6_newlink(struct net *src_net, struct net_device *dev, return vti6_tnl_create2(dev); } +static void vti6_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev != ip6n->fb_tnl_dev) + unregister_netdevice_queue(dev, head); +} + static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -980,6 +989,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .setup = vti6_dev_setup, .validate = vti6_validate, .newlink = vti6_newlink, + .dellink = vti6_dellink, .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, @@ -1020,6 +1030,7 @@ static int __net_init vti6_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops; err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4cbc6b2..b68d0e5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -60,11 +60,11 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static int ipv6_print_tuple(struct seq_file *s, +static void ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%pI6 dst=%pI6 ", - tuple->src.u3.ip6, tuple->dst.u3.ip6); + seq_printf(s, "src=%pI6 dst=%pI6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); } static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index b3807c5..90388d6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -84,13 +84,13 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int icmpv6_print_tuple(struct seq_file *s, +static void icmpv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); } static unsigned int *icmpv6_get_timeouts(struct net *net) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ace29b6..dc495ae 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -903,7 +903,10 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (!ipv6_unicast_destination(skb)) + /* If sk not NULL, it means we did a successful lookup and incoming + * route had to be correct. prequeue might have dropped our dst. + */ + if (!sk && !ipv6_unicast_destination(skb)) return; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2c69975..5016a69 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,16 +611,12 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - - /* We have to check the DYING flag after unlink to prevent - * a race against nf_ct_get_next_corpse() possibly called from - * user context, else we insert an already 'dead' hash, blocking - * further use of that particular connection -JM. - */ - nf_ct_del_from_dying_or_unconfirmed_list(ct); + /* We have to check the DYING flag inside the lock to prevent + a race against nf_ct_get_next_corpse() possibly called from + user context, else we insert an already 'dead' hash, blocking + further use of that particular connection -JM */ if (unlikely(nf_ct_is_dying(ct))) { - nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -640,6 +636,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; + nf_ct_del_from_dying_or_unconfirmed_list(ct); + /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index e7eb807..cf9ace7 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -49,10 +49,9 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static int generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return 0; } static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index cb372f9..6dd995c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -618,17 +618,17 @@ out_invalid: return -NF_ACCEPT; } -static int dccp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void dccp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.dccp.port), - ntohs(tuple->dst.u.dccp.port)); + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.dccp.port), + ntohs(tuple->dst.u.dccp.port)); } -static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) +static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); + seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 957c1db..60865f1 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -63,10 +63,9 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return 0; } static unsigned int *generic_get_timeouts(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index d566573..7648674 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -226,20 +226,20 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, } /* print gre part of tuple */ -static int gre_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void gre_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", - ntohs(tuple->src.u.gre.key), - ntohs(tuple->dst.u.gre.key)); + seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); } /* print private data for conntrack */ -static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) +static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { - return seq_printf(s, "timeout=%u, stream_timeout=%u ", - (ct->proto.gre.timeout / HZ), - (ct->proto.gre.stream_timeout / HZ)); + seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); } static unsigned int *gre_get_timeouts(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1314d33..b45da90 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -166,16 +166,16 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int sctp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void sctp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.sctp.port), - ntohs(tuple->dst.u.sctp.port)); + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); } /* Print out the private part of the conntrack. */ -static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) +static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum sctp_conntrack state; @@ -183,7 +183,7 @@ static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) state = ct->proto.sctp.state; spin_unlock_bh(&ct->lock); - return seq_printf(s, "%s ", sctp_conntrack_names[state]); + seq_printf(s, "%s ", sctp_conntrack_names[state]); } #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d87b642..5caa0c4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -302,16 +302,16 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int tcp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void tcp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.tcp.port), - ntohs(tuple->dst.u.tcp.port)); + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); } /* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) +static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum tcp_conntrack state; @@ -319,7 +319,7 @@ static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) state = ct->proto.tcp.state; spin_unlock_bh(&ct->lock); - return seq_printf(s, "%s ", tcp_conntrack_names[state]); + seq_printf(s, "%s ", tcp_conntrack_names[state]); } static unsigned int get_conntrack_index(const struct tcphdr *tcph) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 9d7721c..6957281 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -63,12 +63,12 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int udp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void udp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); } static unsigned int *udp_get_timeouts(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 2750e6c..c5903d1 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -71,12 +71,12 @@ static bool udplite_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Print out the per-protocol part of the tuple. */ -static int udplite_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) +static void udplite_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); } static unsigned int *udplite_get_timeouts(struct net *net) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index cf65a1e..fc823fa 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -36,12 +36,13 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_NF_CONNTRACK_PROCFS -int +void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) { - return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple); + l3proto->print_tuple(s, tuple); + l4proto->print_tuple(s, tuple); } EXPORT_SYMBOL_GPL(print_tuple); @@ -119,7 +120,7 @@ static void ct_seq_stop(struct seq_file *s, void *v) } #ifdef CONFIG_NF_CONNTRACK_SECMARK -static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { int ret; u32 len; @@ -127,22 +128,20 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) ret = security_secid_to_secctx(ct->secmark, &secctx, &len); if (ret) - return 0; + return; - ret = seq_printf(s, "secctx=%s ", secctx); + seq_printf(s, "secctx=%s ", secctx); security_release_secctx(secctx, len); - return ret; } #else -static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { - return 0; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) +static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { struct ct_iter_state *st = s->private; struct nf_conn_tstamp *tstamp; @@ -156,16 +155,15 @@ static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) else delta_time = 0; - return seq_printf(s, "delta-time=%llu ", - (unsigned long long)delta_time); + seq_printf(s, "delta-time=%llu ", + (unsigned long long)delta_time); } - return 0; + return; } #else -static inline int +static inline void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { - return 0; } #endif @@ -192,55 +190,54 @@ static int ct_seq_show(struct seq_file *s, void *v) NF_CT_ASSERT(l4proto); ret = -ENOSPC; - if (seq_printf(s, "%-8s %u %-8s %u %ld ", - l3proto->name, nf_ct_l3num(ct), - l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) - goto release; + seq_printf(s, "%-8s %u %-8s %u %ld ", + l3proto->name, nf_ct_l3num(ct), + l4proto->name, nf_ct_protonum(ct), + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0); - if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) - goto release; + if (l4proto->print_conntrack) + l4proto->print_conntrack(s, ct); + + print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, l4proto); - if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto)) + if (seq_has_overflowed(s)) goto release; if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - if (seq_printf(s, "[UNREPLIED] ")) - goto release; + seq_printf(s, "[UNREPLIED] "); - if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto)) - goto release; + print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, l4proto); if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) - if (seq_printf(s, "[ASSURED] ")) - goto release; + seq_printf(s, "[ASSURED] "); -#if defined(CONFIG_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%u ", ct->mark)) + if (seq_has_overflowed(s)) goto release; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + seq_printf(s, "mark=%u ", ct->mark); #endif - if (ct_show_secctx(s, ct)) - goto release; + ct_show_secctx(s, ct); #ifdef CONFIG_NF_CONNTRACK_ZONES - if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) - goto release; + seq_printf(s, "zone=%u ", nf_ct_zone(ct)); #endif - if (ct_show_delta_time(s, ct)) - goto release; + ct_show_delta_time(s, ct); + + seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); - if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) + if (seq_has_overflowed(s)) goto release; ret = 0; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index d719764..6e3b911 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -294,19 +294,19 @@ static int seq_show(struct seq_file *s, void *v) { loff_t *pos = v; const struct nf_logger *logger; - int i, ret; + int i; struct net *net = seq_file_net(s); logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], lockdep_is_held(&nf_log_mutex)); if (!logger) - ret = seq_printf(s, "%2lld NONE (", *pos); + seq_printf(s, "%2lld NONE (", *pos); else - ret = seq_printf(s, "%2lld %s (", *pos, logger->name); + seq_printf(s, "%2lld %s (", *pos, logger->name); - if (ret < 0) - return ret; + if (seq_has_overflowed(s)) + return -ENOSPC; for (i = 0; i < NF_LOG_TYPE_MAX; i++) { if (loggers[*pos][i] == NULL) @@ -314,17 +314,19 @@ static int seq_show(struct seq_file *s, void *v) logger = rcu_dereference_protected(loggers[*pos][i], lockdep_is_held(&nf_log_mutex)); - ret = seq_printf(s, "%s", logger->name); - if (ret < 0) - return ret; - if (i == 0 && loggers[*pos][i + 1] != NULL) { - ret = seq_printf(s, ","); - if (ret < 0) - return ret; - } + seq_printf(s, "%s", logger->name); + if (i == 0 && loggers[*pos][i + 1] != NULL) + seq_printf(s, ","); + + if (seq_has_overflowed(s)) + return -ENOSPC; } - return seq_printf(s, ")\n"); + seq_printf(s, ")\n"); + + if (seq_has_overflowed(s)) + return -ENOSPC; + return 0; } static const struct seq_operations nflog_seq_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 7c60ccd..0db8515 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -1242,12 +1242,13 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfqnl_instance *inst = v; - return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", - inst->queue_num, - inst->peer_portid, inst->queue_total, - inst->copy_mode, inst->copy_range, - inst->queue_dropped, inst->queue_user_dropped, - inst->id_sequence, 1); + seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + inst->queue_num, + inst->peer_portid, inst->queue_total, + inst->copy_mode, inst->copy_range, + inst->queue_dropped, inst->queue_user_dropped, + inst->id_sequence, 1); + return seq_has_overflowed(s); } static const struct seq_operations nfqnl_seq_ops = { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 133eb47..51a459c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -947,9 +947,10 @@ static int xt_table_seq_show(struct seq_file *seq, void *v) { struct xt_table *table = list_entry(v, struct xt_table, list); - if (strlen(table->name)) - return seq_printf(seq, "%s\n", table->name); - else + if (strlen(table->name)) { + seq_printf(seq, "%s\n", table->name); + return seq_has_overflowed(seq); + } else return 0; } @@ -1086,8 +1087,10 @@ static int xt_match_seq_show(struct seq_file *seq, void *v) if (trav->curr == trav->head) return 0; match = list_entry(trav->curr, struct xt_match, list); - return (*match->name == '\0') ? 0 : - seq_printf(seq, "%s\n", match->name); + if (*match->name == '\0') + return 0; + seq_printf(seq, "%s\n", match->name); + return seq_has_overflowed(seq); } return 0; } @@ -1139,8 +1142,10 @@ static int xt_target_seq_show(struct seq_file *seq, void *v) if (trav->curr == trav->head) return 0; target = list_entry(trav->curr, struct xt_target, list); - return (*target->name == '\0') ? 0 : - seq_printf(seq, "%s\n", target->name); + if (*target->name == '\0') + return 0; + seq_printf(seq, "%s\n", target->name); + return seq_has_overflowed(seq); } return 0; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 05fbc2a..1786968 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -789,7 +789,6 @@ static void dl_seq_stop(struct seq_file *s, void *v) static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - int res; const struct xt_hashlimit_htable *ht = s->private; spin_lock(&ent->lock); @@ -798,33 +797,32 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, switch (family) { case NFPROTO_IPV4: - res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", - (long)(ent->expires - jiffies)/HZ, - &ent->dst.ip.src, - ntohs(ent->dst.src_port), - &ent->dst.ip.dst, - ntohs(ent->dst.dst_port), - ent->rateinfo.credit, ent->rateinfo.credit_cap, - ent->rateinfo.cost); + seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", + (long)(ent->expires - jiffies)/HZ, + &ent->dst.ip.src, + ntohs(ent->dst.src_port), + &ent->dst.ip.dst, + ntohs(ent->dst.dst_port), + ent->rateinfo.credit, ent->rateinfo.credit_cap, + ent->rateinfo.cost); break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: - res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", - (long)(ent->expires - jiffies)/HZ, - &ent->dst.ip6.src, - ntohs(ent->dst.src_port), - &ent->dst.ip6.dst, - ntohs(ent->dst.dst_port), - ent->rateinfo.credit, ent->rateinfo.credit_cap, - ent->rateinfo.cost); + seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", + (long)(ent->expires - jiffies)/HZ, + &ent->dst.ip6.src, + ntohs(ent->dst.src_port), + &ent->dst.ip6.dst, + ntohs(ent->dst.dst_port), + ent->rateinfo.credit, ent->rateinfo.credit_cap, + ent->rateinfo.cost); break; #endif default: BUG(); - res = 0; } spin_unlock(&ent->lock); - return res; + return seq_has_overflowed(s); } static int dl_seq_show(struct seq_file *s, void *v) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 87d20f4..07c04a8 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -378,7 +378,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) __unregister_prot_hook(sk, sync); } -static inline __pure struct page *pgv_to_page(void *addr) +static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 0f62326..2a47179 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -63,6 +63,15 @@ static const struct rfkill_ops rfkill_gpio_ops = { .set_block = rfkill_gpio_set_power, }; +static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; +static const struct acpi_gpio_params shutdown_gpios = { 1, 0, false }; + +static const struct acpi_gpio_mapping acpi_rfkill_default_gpios[] = { + { "reset-gpios", &reset_gpios, 1 }, + { "shutdown-gpios", &shutdown_gpios, 1 }, + { }, +}; + static int rfkill_gpio_acpi_probe(struct device *dev, struct rfkill_gpio_data *rfkill) { @@ -75,7 +84,8 @@ static int rfkill_gpio_acpi_probe(struct device *dev, rfkill->name = dev_name(dev); rfkill->type = (unsigned)id->driver_data; - return 0; + return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), + acpi_rfkill_default_gpios); } static int rfkill_gpio_probe(struct platform_device *pdev) @@ -102,7 +112,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->clk = devm_clk_get(&pdev->dev, NULL); - gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0); + gpio = devm_gpiod_get(&pdev->dev, "reset"); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -110,7 +120,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->reset_gpio = gpio; } - gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1); + gpio = devm_gpiod_get(&pdev->dev, "shutdown"); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -150,6 +160,8 @@ static int rfkill_gpio_remove(struct platform_device *pdev) rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); + acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev)); + return 0; } diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 0754d0f..fb78117 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -35,6 +35,7 @@ config RPCSEC_GSS_KRB5 config SUNRPC_DEBUG bool "RPC: Enable dprintk debugging" depends on SUNRPC && SYSCTL + select DEBUG_FS help This option enables a sysctl-based debugging interface that is be used by the 'rpcdebug' utility to turn on or off diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index e5a7a1c..15e6f6c 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -14,6 +14,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 383eb91..47f38be 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -16,7 +16,7 @@ #include <linux/sunrpc/gss_api.h> #include <linux/spinlock.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -646,7 +646,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) cred->cr_magic = RPCAUTH_CRED_MAGIC; #endif cred->cr_uid = acred->uid; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 6f6b829..41248b1 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -14,7 +14,7 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/sched.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 53ed8d3..dace13d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -66,7 +66,7 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; #define GSS_KEY_EXPIRE_TIMEO 240 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index c586e92..254defe 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -38,7 +38,7 @@ #include <linux/sunrpc/gss_asn1.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index f5ed9f6..b5408e8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -45,7 +45,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/xdr.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 24589bd..234fa8d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -61,7 +61,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/lcm.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0d3c158..28db442 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -45,7 +45,7 @@ #include <linux/crypto.h> #include <linux/sunrpc/gss_krb5_enctypes.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 42768e5..1d74d65 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -64,7 +64,7 @@ #include <linux/random.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 62ac90c..20d55c7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -35,7 +35,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 6c981dd..dcf9515 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -62,7 +62,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 4b614c6..ca7e92a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -35,7 +35,7 @@ #include <linux/pagemap.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 92d5ab9..7063d85 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -46,7 +46,7 @@ #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/clnt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 685a688..9d88c62 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -25,7 +25,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprtsock.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c548ab2..de856dd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -51,7 +51,7 @@ #include "gss_rpc_upcall.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 712c123..c2a2b58 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -138,7 +138,7 @@ struct rpc_cred null_cred = { .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) .cr_magic = RPCAUTH_CRED_MAGIC, #endif }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d5d6923..4feda2d 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -25,7 +25,7 @@ struct unx_cred { #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9761a0d..651f49a 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -27,7 +27,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/export.h> #include <linux/sunrpc/bc_xprt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_TRANS #endif diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9acd6ce..05da12a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -42,7 +42,7 @@ #include "sunrpc.h" #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -305,6 +305,10 @@ static int rpc_client_register(struct rpc_clnt *clnt, struct super_block *pipefs_sb; int err; + err = rpc_clnt_debugfs_register(clnt); + if (err) + return err; + pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { err = rpc_setup_pipedir(pipefs_sb, clnt); @@ -331,6 +335,7 @@ err_auth: out: if (pipefs_sb) rpc_put_sb_net(net); + rpc_clnt_debugfs_unregister(clnt); return err; } @@ -670,6 +675,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); + rpc_clnt_debugfs_unregister(clnt); /* * A new transport was created. "clnt" therefore @@ -771,6 +777,7 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; + rpc_clnt_debugfs_unregister(clnt); rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); @@ -1396,8 +1403,9 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); -#ifdef RPC_DEBUG -static const char *rpc_proc_name(const struct rpc_task *task) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +const char +*rpc_proc_name(const struct rpc_task *task) { const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -2421,7 +2429,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int } EXPORT_SYMBOL_GPL(rpc_call_null); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { printk(KERN_INFO "-pid- flgs status -client- --rqstp- " diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c new file mode 100644 index 0000000..e811f39 --- /dev/null +++ b/net/sunrpc/debugfs.c @@ -0,0 +1,292 @@ +/** + * debugfs interface for sunrpc + * + * (c) 2014 Jeff Layton <jlayton@primarydata.com> + */ + +#include <linux/debugfs.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> +#include "netns.h" + +static struct dentry *topdir; +static struct dentry *rpc_clnt_dir; +static struct dentry *rpc_xprt_dir; + +struct rpc_clnt_iter { + struct rpc_clnt *clnt; + loff_t pos; +}; + +static int +tasks_show(struct seq_file *f, void *v) +{ + u32 xid = 0; + struct rpc_task *task = v; + struct rpc_clnt *clnt = task->tk_client; + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + if (task->tk_rqstp) + xid = be32_to_cpu(task->tk_rqstp->rq_xid); + + seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, + clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), + task->tk_action, rpc_waitq); + return 0; +} + +static void * +tasks_start(struct seq_file *f, loff_t *ppos) + __acquires(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + loff_t pos = *ppos; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task; + + iter->pos = pos + 1; + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) + if (pos-- == 0) + return task; + return NULL; +} + +static void * +tasks_next(struct seq_file *f, void *v, loff_t *pos) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task = v; + struct list_head *next = task->tk_task.next; + + ++iter->pos; + ++*pos; + + /* If there's another task on list, return it */ + if (next == &clnt->cl_tasks) + return NULL; + return list_entry(next, struct rpc_task, tk_task); +} + +static void +tasks_stop(struct seq_file *f, void *v) + __releases(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + + spin_unlock(&clnt->cl_lock); +} + +static const struct seq_operations tasks_seq_operations = { + .start = tasks_start, + .next = tasks_next, + .stop = tasks_stop, + .show = tasks_show, +}; + +static int tasks_open(struct inode *inode, struct file *filp) +{ + int ret = seq_open_private(filp, &tasks_seq_operations, + sizeof(struct rpc_clnt_iter)); + + if (!ret) { + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + iter->clnt = inode->i_private; + + if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { + seq_release_private(inode, filp); + ret = -EINVAL; + } + } + + return ret; +} + +static int +tasks_release(struct inode *inode, struct file *filp) +{ + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + rpc_release_client(iter->clnt); + return seq_release_private(inode, filp); +} + +static const struct file_operations tasks_fops = { + .owner = THIS_MODULE, + .open = tasks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tasks_release, +}; + +int +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + int len, err; + char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ + + /* Already registered? */ + if (clnt->cl_debugfs) + return 0; + + len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); + if (!clnt->cl_debugfs) + return -ENOMEM; + + /* make tasks file */ + err = -ENOMEM; + if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, + clnt, &tasks_fops)) + goto out_err; + + err = -EINVAL; + rcu_read_lock(); + len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", + rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); + rcu_read_unlock(); + if (len >= sizeof(name)) + goto out_err; + + err = -ENOMEM; + if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) + goto out_err; + + return 0; +out_err: + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; + return err; +} + +void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; +} + +static int +xprt_info_show(struct seq_file *f, void *v) +{ + struct rpc_xprt *xprt = f->private; + + seq_printf(f, "netid: %s\n", xprt->address_strings[RPC_DISPLAY_NETID]); + seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); + seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]); + seq_printf(f, "state: 0x%lx\n", xprt->state); + return 0; +} + +static int +xprt_info_open(struct inode *inode, struct file *filp) +{ + int ret; + struct rpc_xprt *xprt = inode->i_private; + + ret = single_open(filp, xprt_info_show, xprt); + + if (!ret) { + if (!xprt_get(xprt)) { + single_release(inode, filp); + ret = -EINVAL; + } + } + return ret; +} + +static int +xprt_info_release(struct inode *inode, struct file *filp) +{ + struct rpc_xprt *xprt = inode->i_private; + + xprt_put(xprt); + return single_release(inode, filp); +} + +static const struct file_operations xprt_info_fops = { + .owner = THIS_MODULE, + .open = xprt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = xprt_info_release, +}; + +int +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + int len, id; + static atomic_t cur_id; + char name[9]; /* 8 hex digits + NULL term */ + + id = (unsigned int)atomic_inc_return(&cur_id); + + len = snprintf(name, sizeof(name), "%x", id); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); + if (!xprt->debugfs) + return -ENOMEM; + + /* make tasks file */ + if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, + xprt, &xprt_info_fops)) { + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; + return -ENOMEM; + } + + return 0; +} + +void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; +} + +void __exit +sunrpc_debugfs_exit(void) +{ + debugfs_remove_recursive(topdir); +} + +int __init +sunrpc_debugfs_init(void) +{ + topdir = debugfs_create_dir("sunrpc", NULL); + if (!topdir) + goto out; + + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); + if (!rpc_clnt_dir) + goto out_remove; + + rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir); + if (!rpc_xprt_dir) + goto out_remove; + + return 0; +out_remove: + debugfs_remove_recursive(topdir); + topdir = NULL; +out: + return -ENOMEM; +} diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 1891a10..0520201 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,7 +32,7 @@ #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_BIND #endif diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fe3441a..d20f232 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -24,7 +24,7 @@ #include "sunrpc.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_SCHED #endif @@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key) return 0; } -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 5453049..9711a15 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -116,7 +116,15 @@ EXPORT_SYMBOL_GPL(svc_seq_show); */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); + struct rpc_iostats *stats; + int i; + + stats = kcalloc(clnt->cl_maxproc, sizeof(*stats), GFP_KERNEL); + if (stats) { + for (i = 0; i < clnt->cl_maxproc; i++) + spin_lock_init(&stats[i].om_lock); + } + return stats; } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); @@ -135,20 +143,21 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task * @stats: array of stat structures - * - * Relies on the caller for serialization. */ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_iostats *op_metrics; - ktime_t delta; + ktime_t delta, now; if (!stats || !req) return; + now = ktime_get(); op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; + spin_lock(&op_metrics->om_lock); + op_metrics->om_ops++; op_metrics->om_ntrans += req->rq_ntrans; op_metrics->om_timeouts += task->tk_timeouts; @@ -161,8 +170,10 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); - delta = ktime_sub(ktime_get(), task->tk_start); + delta = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); + + spin_unlock(&op_metrics->om_lock); } EXPORT_SYMBOL_GPL(rpc_count_iostats); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index cd30120..e37fbed 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -97,13 +97,20 @@ init_sunrpc(void) err = register_rpc_pipefs(); if (err) goto out4; -#ifdef RPC_DEBUG + + err = sunrpc_debugfs_init(); + if (err) + goto out5; + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; +out5: + unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: @@ -120,10 +127,11 @@ cleanup_sunrpc(void) rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); + sunrpc_debugfs_exit(); unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ca8a795..2783fd8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -28,6 +28,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/bc_xprt.h> +#include <trace/events/sunrpc.h> + #define RPCDBG_FACILITY RPCDBG_SVCDSP static void svc_unregister(const struct svc_serv *serv, struct net *net); @@ -1040,7 +1042,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1314,24 +1316,25 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - rqstp->rq_xid = svc_getu32(argv); - dir = svc_getnl(argv); if (dir != 0) { /* direction != CALL */ svc_printk(rqstp, "bad direction %d, dropping request\n", dir); serv->sv_stats->rpcbadfmt++; - svc_drop(rqstp); - return 0; + goto out_drop; } /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) - return svc_send(rqstp); - else { - svc_drop(rqstp); - return 0; + if (likely(svc_process_common(rqstp, argv, resv))) { + int ret = svc_send(rqstp); + + trace_svc_process(rqstp, ret); + return ret; } +out_drop: + trace_svc_process(rqstp, 0); + svc_drop(rqstp); + return 0; } #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c179ca2..bbb3b04 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -15,6 +15,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprt.h> #include <linux/module.h> +#include <trace/events/sunrpc.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -773,35 +774,43 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = svc_alloc_arg(rqstp); if (err) - return err; + goto out; try_to_freeze(); cond_resched(); + err = -EINTR; if (signalled() || kthread_should_stop()) - return -EINTR; + goto out; xprt = svc_get_next_xprt(rqstp, timeout); - if (IS_ERR(xprt)) - return PTR_ERR(xprt); + if (IS_ERR(xprt)) { + err = PTR_ERR(xprt); + goto out; + } len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ + err = -EAGAIN; if (len <= 0) - goto out; + goto out_release; clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; + rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); if (serv->sv_stats) serv->sv_stats->netcnt++; + trace_svc_recv(rqstp, len); return len; -out: +out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); - return -EAGAIN; +out: + trace_svc_recv(rqstp, err); + return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -821,12 +830,12 @@ EXPORT_SYMBOL_GPL(svc_drop); int svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len; + int len = -EFAULT; struct xdr_buf *xb; xprt = rqstp->rq_xprt; if (!xprt) - return -EFAULT; + goto out; /* release the receive skb before sending the reply */ rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); @@ -849,7 +858,9 @@ int svc_send(struct svc_rqst *rqstp) svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - return 0; + len = 0; +out: + trace_svc_send(rqstp, len); return len; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3f959c6..f9c052d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) xid = *p++; calldir = *p; - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, ntohl(xid)); + if (!bc_xprt) return -EAGAIN; - } + spin_lock_bh(&bc_xprt->transport_lock); + req = xprt_lookup_rqst(bc_xprt, xid); + if (!req) + goto unlock_notfound; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); /* @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) dst = &req->rq_private_buf.head[0]; src = &rqstp->rq_arg.head[0]; if (dst->iov_len < src->iov_len) - return -EAGAIN; /* whatever; just giving up. */ + goto unlock_eagain; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; + spin_unlock_bh(&bc_xprt->transport_lock); return 0; +unlock_notfound: + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, ntohl(xid)); +unlock_eagain: + spin_unlock_bh(&bc_xprt->transport_lock); + return -EAGAIN; } static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index c99c58e..887f018 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(nfsd_debug); unsigned int nlm_debug; EXPORT_SYMBOL_GPL(nlm_debug); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static struct ctl_table_header *sunrpc_table_header; static struct ctl_table sunrpc_table[]; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 56e4e15..ebbefad 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -49,13 +49,15 @@ #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> +#include <trace/events/sunrpc.h> + #include "sunrpc.h" /* * Local variables */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_XPRT #endif @@ -772,11 +774,14 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *entry; list_for_each_entry(entry, &xprt->recv, rq_list) - if (entry->rq_xid == xid) + if (entry->rq_xid == xid) { + trace_xprt_lookup_rqst(xprt, xid, 0); return entry; + } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); + trace_xprt_lookup_rqst(xprt, xid, -ENOENT); xprt->stat.bad_xids++; return NULL; } @@ -810,6 +815,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); + trace_xprt_complete_rqst(xprt, req->rq_xid, copied); xprt->stat.recvs++; req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); @@ -926,6 +932,7 @@ void xprt_transmit(struct rpc_task *task) req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); + trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { task->tk_status = status; return; @@ -1296,6 +1303,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { + int err; struct rpc_xprt *xprt; struct xprt_class *t; @@ -1336,6 +1344,12 @@ found: return ERR_PTR(-ENOMEM); } + err = rpc_xprt_debugfs_register(xprt); + if (err) { + xprt_destroy(xprt); + return ERR_PTR(err); + } + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1352,6 +1366,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) dprintk("RPC: destroying transport %p\n", xprt); del_timer_sync(&xprt->timer); + rpc_xprt_debugfs_unregister(xprt); rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 6166c98..df01d12 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -49,11 +49,11 @@ #include <linux/highmem.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char transfertypes[][12] = { "pure inline", /* no chunks */ " read chunk", /* some argument via rdma read */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6a4615d..bbd6155 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -55,7 +55,7 @@ #include "xprt_rdma.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -73,9 +73,9 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 0; + int xprt_rdma_pad_optimize = 1; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; @@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); - else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) rc = rpcrdma_marshal_chunks(rqst, 0); if (rc < 0) goto failed_marshal; @@ -705,7 +705,7 @@ static void __exit xprt_rdma_cleanup(void) int rc; dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; @@ -736,7 +736,7 @@ static int __init xprt_rdma_init(void) dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 61c4129..c98e406 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -57,11 +57,12 @@ * Globals/Macros */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); +static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); /* * internal functions @@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); +static const char * const async_event[] = { + "CQ error", + "QP fatal error", + "QP request error", + "QP access error", + "communication established", + "send queue drained", + "path migration successful", + "path mig error", + "device fatal error", + "port active", + "port error", + "LID change", + "P_key change", + "SM change", + "SRQ error", + "SRQ limit reached", + "last WQE reached", + "client reregister", + "GID change", +}; + +#define ASYNC_MSG(status) \ + ((status) < ARRAY_SIZE(async_event) ? \ + async_event[(status)] : "unknown async error") + +static void +rpcrdma_schedule_tasklet(struct list_head *sched_list) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_splice_tail(sched_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: QP error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: CQ error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; - unsigned long flags; INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; @@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) rc = 0; out_schedule: - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_splice_tail(&sched_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); + rpcrdma_schedule_tasklet(&sched_list); return rc; } @@ -309,11 +345,18 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { - rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); - rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); + struct ib_wc wc; + LIST_HEAD(sched_list); + + while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) + rpcrdma_recvcq_process_wc(&wc, &sched_list); + if (!list_empty(&sched_list)) + rpcrdma_schedule_tasklet(&sched_list); + while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) + rpcrdma_sendcq_process_wc(&wc); } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char * const conn[] = { "address resolved", "address error", @@ -344,7 +387,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif struct ib_qp_attr attr; @@ -408,7 +451,7 @@ connected: break; } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { int ird = attr.max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; @@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; - if (ep->rep_cqinit <= 2) + if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) + ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; + else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; @@ -866,8 +911,19 @@ retry: rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); - if (ia->ri_memreg_strategy == RPCRDMA_FRMR) + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: rpcrdma_reset_frmrs(ia); + break; + case RPCRDMA_MTHCAFMR: + rpcrdma_reset_fmrs(ia); + break; + case RPCRDMA_ALLPHYSICAL: + break; + default: + rc = -EIO; + goto out; + } xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, @@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* After a disconnect, unmap all FMRs. + * + * This is invoked only in the transport connect worker in order + * to serialize with rpcrdma_register_fmr_external(). + */ +static void +rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct list_head *pos; + struct rpcrdma_mw *r; + LIST_HEAD(l); + int rc; + + list_for_each(pos, &buf->rb_all) { + r = list_entry(pos, struct rpcrdma_mw, mw_all); + + INIT_LIST_HEAD(&l); + list_add(&r->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + if (rc) + dprintk("RPC: %s: ib_unmap_fmr failed %i\n", + __func__, rc); + } +} + /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in * an unusable state. Find FRMRs in this state and dereg / reg * each. FRMRs that are VALID and attached to an rpcrdma_req are @@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; default: - return -1; + return -EIO; } if (rc) - return -1; + return rc; return nsegs; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7fc9a..b799041 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -97,6 +97,12 @@ struct rpcrdma_ep { struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; +/* + * Force a signaled SEND Work Request every so often, + * in case the provider needs to do some housekeeping. + */ +#define RPCRDMA_MAX_UNSIGNALED_SENDS (32) + #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3b305ab..87ce7e8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -75,7 +75,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; * someone else's file names! */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; @@ -186,7 +186,7 @@ static struct ctl_table sunrpc_table[] = { */ #define XS_IDLE_DISC_TO (5U * 60 * HZ) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -216,65 +216,6 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif -struct sock_xprt { - struct rpc_xprt xprt; - - /* - * Network layer - */ - struct socket * sock; - struct sock * inet; - - /* - * State of TCP reply receive - */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; - - u32 tcp_offset, - tcp_reclen; - - unsigned long tcp_copied, - tcp_flags; - - /* - * Connection of transports - */ - struct delayed_work connect_worker; - struct sockaddr_storage srcaddr; - unsigned short srcport; - - /* - * UDP socket buffer size parameters - */ - size_t rcvsize, - sndsize; - - /* - * Saved socket callback addresses - */ - void (*old_data_ready)(struct sock *); - void (*old_state_change)(struct sock *); - void (*old_write_space)(struct sock *); - void (*old_error_report)(struct sock *); -}; - -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - -/* - * TCP RPC flags - */ -#define TCP_RPC_REPLY (1UL << 6) - static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) { return (struct rpc_xprt *) sk->sk_user_data; @@ -1415,6 +1356,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns dprintk("RPC: xs_tcp_data_recv started\n"); do { + trace_xs_tcp_data_recv(transport); /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { @@ -1439,6 +1381,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /* Skip over any trailing bytes on short reads */ xs_tcp_read_discard(transport, &desc); } while (desc.count); + trace_xs_tcp_data_recv(transport); dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; } @@ -1454,12 +1397,15 @@ static void xs_tcp_data_ready(struct sock *sk) struct rpc_xprt *xprt; read_descriptor_t rd_desc; int read; + unsigned long total = 0; dprintk("RPC: xs_tcp_data_ready...\n"); read_lock_bh(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) + if (!(xprt = xprt_from_sock(sk))) { + read = 0; goto out; + } /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ @@ -1471,8 +1417,11 @@ static void xs_tcp_data_ready(struct sock *sk) do { rd_desc.count = 65536; read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + if (read > 0) + total += read; } while (read > 0); out: + trace_xs_tcp_data_ready(xprt, read, total); read_unlock_bh(&sk->sk_callback_lock); } @@ -3042,7 +2991,7 @@ static struct xprt_class xs_bc_tcp_transport = { */ int init_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif @@ -3061,7 +3010,7 @@ int init_socket_xprt(void) */ void cleanup_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; |