summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-18 02:48:51 (GMT)
committerDavid S. Miller <davem@davemloft.net>2015-02-18 02:48:51 (GMT)
commitfece13ca005a5f559147e9424321f4b5e01272b4 (patch)
tree54e762e70afb664d14152e6bcf89a48be3fb9c13 /net
parent855e7e7174bade3f2b63077a81eea5aab525dbf6 (diff)
parentf5af19d10d151c5a2afae3306578f485c244db25 (diff)
downloadlinux-fece13ca005a5f559147e9424321f4b5e01272b4.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_netfilter.c7
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/net-sysfs.c28
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/ipv4/cipso_ipv4.c51
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fou.c42
-rw-r--r--net/ipv4/tcp_fastopen.c32
-rw-r--r--net/ipv4/tcp_memcontrol.c4
-rw-r--r--net/ipv4/udp_offload.c13
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/udp_offload.c6
-rw-r--r--net/netfilter/nft_compat.c63
-rw-r--r--net/netfilter/nft_lookup.c1
-rw-r--r--net/netlabel/netlabel_kapi.c15
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/rds/cong.c16
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svc_xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c244
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c46
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c47
30 files changed, 434 insertions, 247 deletions
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 65728e0..0ee453f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -987,15 +987,12 @@ static int __init br_netfilter_init(void)
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto err1;
+ nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ return -ENOMEM;
}
#endif
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
-err1:
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- return ret;
}
static void __exit br_netfilter_fini(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index d030575..8f9710c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4024,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
+ NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
@@ -5335,7 +5336,7 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
/**
* netdev_bonding_info_change - Dispatch event about slave change
* @dev: device
- * @netdev_bonding_info: info to dispatch
+ * @bonding_info: info to dispatch
*
* Send NETDEV_BONDING_INFO to netdev notifiers with info.
* The caller must hold the RTNL lock.
diff --git a/net/core/filter.c b/net/core/filter.c
index ec9baea..f6bdc2b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -531,7 +531,7 @@ do_pass:
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
break;
- /* Unkown instruction. */
+ /* Unknown instruction. */
default:
goto err;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9993412..f2aa73b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -614,8 +614,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *map;
cpumask_var_t mask;
- size_t len = 0;
- int i;
+ int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
@@ -626,17 +625,11 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
for (i = 0; i < map->len; i++)
cpumask_set_cpu(map->cpus[i], mask);
- len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
- if (PAGE_SIZE - len < 3) {
- rcu_read_unlock();
- free_cpumask_var(mask);
- return -EINVAL;
- }
+ len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
rcu_read_unlock();
-
free_cpumask_var(mask);
- len += sprintf(buf + len, "\n");
- return len;
+
+ return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t store_rps_map(struct netdev_rx_queue *queue,
@@ -1090,8 +1083,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
struct xps_dev_maps *dev_maps;
cpumask_var_t mask;
unsigned long index;
- size_t len = 0;
- int i;
+ int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
@@ -1117,15 +1109,9 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
}
rcu_read_unlock();
- len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
- if (PAGE_SIZE - len < 3) {
- free_cpumask_var(mask);
- return -EINVAL;
- }
-
+ len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
free_cpumask_var(mask);
- len += sprintf(buf + len, "\n");
- return len;
+ return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t store_xps_map(struct netdev_queue *queue,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9fa25b0..b4899f5b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -97,7 +97,7 @@
* New xmit() return, do_div and misc clean up by Stephen Hemminger
* <shemminger@osdl.org> 040923
*
- * Randy Dunlap fixed u64 printk compiler waring
+ * Randy Dunlap fixed u64 printk compiler warning
*
* Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org>
* New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5be499b..ab293a3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2162,7 +2162,14 @@ replay:
}
err = rtnl_configure_link(dev, ifm);
if (err < 0) {
- unregister_netdevice(dev);
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
+ }
goto out;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index eaa51dd..4334248 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -155,7 +155,7 @@ write_unlock:
rcu_read_unlock();
len = min(sizeof(kbuf) - 1, *lenp);
- len = cpumask_scnprintf(kbuf, len, mask);
+ len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
if (!len) {
*lenp = 0;
goto done;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d104ae1..f23dead 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -521,10 +521,13 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
- int ret;
+ int mode, ret;
port_dn = cd->port_dn[p->port];
- p->phy_interface = of_get_phy_mode(port_dn);
+ mode = of_get_phy_mode(port_dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ p->phy_interface = mode;
phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
if (of_phy_is_fixed_link(port_dn)) {
@@ -559,6 +562,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
if (!p->phy)
return -ENODEV;
+ /* Use already configured phy mode */
+ p->phy_interface = p->phy->interface;
phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
p->phy_interface);
} else {
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 5160c71..e361ea6 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -378,20 +378,18 @@ static int cipso_v4_cache_check(const unsigned char *key,
* negative values on failure.
*
*/
-int cipso_v4_cache_add(const struct sk_buff *skb,
+int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
- unsigned char *cipso_ptr;
u32 cipso_ptr_len;
if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
return 0;
- cipso_ptr = CIPSO_V4_OPTPTR(skb);
cipso_ptr_len = cipso_ptr[1];
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
@@ -1579,6 +1577,33 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
}
/**
+ * cipso_v4_optptr - Find the CIPSO option in the packet
+ * @skb: the packet
+ *
+ * Description:
+ * Parse the packet's IP header looking for a CIPSO option. Returns a pointer
+ * to the start of the CIPSO option on success, NULL if one if not found.
+ *
+ */
+unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]);
+ int optlen;
+ int taglen;
+
+ for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
+ if (optptr[0] == IPOPT_CIPSO)
+ return optptr;
+ taglen = optptr[1];
+ optlen -= taglen;
+ optptr += taglen;
+ }
+
+ return NULL;
+}
+
+/**
* cipso_v4_validate - Validate a CIPSO option
* @option: the start of the option, on error it is set to point to the error
*
@@ -2119,8 +2144,8 @@ void cipso_v4_req_delattr(struct request_sock *req)
* on success and negative values on failure.
*
*/
-static int cipso_v4_getattr(const unsigned char *cipso,
- struct netlbl_lsm_secattr *secattr)
+int cipso_v4_getattr(const unsigned char *cipso,
+ struct netlbl_lsm_secattr *secattr)
{
int ret_val = -ENOMSG;
u32 doi;
@@ -2305,22 +2330,6 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb)
return 0;
}
-/**
- * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
- * @skb: the packet
- * @secattr: the security attributes
- *
- * Description:
- * Parse the given packet's CIPSO option and return the security attributes.
- * Returns zero on success and negative values on failure.
- *
- */
-int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
- struct netlbl_lsm_secattr *secattr)
-{
- return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr);
-}
-
/*
* Setup Functions
*/
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f0b4a31d..3a8985c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1186,7 +1186,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
no_in_dev:
/* Not loopback addresses on loopback should be preferred
- in this case. It is importnat that lo is the first interface
+ in this case. It is important that lo is the first interface
in dev_base list.
*/
for_each_netdev_rcu(net, dev) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 92ddea1..ff069f6 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -22,14 +22,18 @@ static LIST_HEAD(fou_list);
struct fou {
struct socket *sock;
u8 protocol;
+ u8 flags;
u16 port;
struct udp_offload udp_offloads;
struct list_head list;
};
+#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
+
struct fou_cfg {
u16 type;
u8 protocol;
+ u8 flags;
struct udp_port_cfg udp_config;
};
@@ -64,24 +68,20 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
}
static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
- void *data, size_t hdrlen, u8 ipproto)
+ void *data, size_t hdrlen, u8 ipproto,
+ bool nopartial)
{
__be16 *pd = data;
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
- if (skb->remcsum_offload) {
- /* Already processed in GRO path */
- skb->remcsum_offload = 0;
- return guehdr;
- }
-
if (!pskb_may_pull(skb, plen))
return NULL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
+ skb_remcsum_process(skb, (void *)guehdr + hdrlen,
+ start, offset, nopartial);
return guehdr;
}
@@ -142,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (flags & GUE_PFLAG_REMCSUM) {
guehdr = gue_remcsum(skb, guehdr, data + doffset,
- hdrlen, guehdr->proto_ctype);
+ hdrlen, guehdr->proto_ctype,
+ !!(fou->flags &
+ FOU_F_REMCSUM_NOPARTIAL));
if (!guehdr)
goto drop;
@@ -214,7 +216,8 @@ out_unlock:
static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
struct guehdr *guehdr, void *data,
- size_t hdrlen, u8 ipproto)
+ size_t hdrlen, u8 ipproto,
+ struct gro_remcsum *grc, bool nopartial)
{
__be16 *pd = data;
size_t start = ntohs(pd[0]);
@@ -222,7 +225,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
if (skb->remcsum_offload)
- return guehdr;
+ return NULL;
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
@@ -234,7 +237,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
return NULL;
}
- skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
+ skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
+ start, offset, grc, nopartial);
skb->remcsum_offload = 1;
@@ -254,6 +258,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
void *data;
u16 doffset = 0;
int flush = 1;
+ struct fou *fou = container_of(uoff, struct fou, udp_offloads);
+ struct gro_remcsum grc;
+
+ skb_gro_remcsum_init(&grc);
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
@@ -295,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
if (flags & GUE_PFLAG_REMCSUM) {
guehdr = gue_gro_remcsum(skb, off, guehdr,
data + doffset, hdrlen,
- guehdr->proto_ctype);
+ guehdr->proto_ctype, &grc,
+ !!(fou->flags &
+ FOU_F_REMCSUM_NOPARTIAL));
if (!guehdr)
goto out;
@@ -345,6 +355,7 @@ out_unlock:
rcu_read_unlock();
out:
NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_remcsum_cleanup(skb, &grc);
return pp;
}
@@ -455,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk = sock->sk;
+ fou->flags = cfg->flags;
fou->port = cfg->udp_config.local_udp_port;
/* Initial for fou type */
@@ -541,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_AF] = { .type = NLA_U8, },
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
[FOU_ATTR_TYPE] = { .type = NLA_U8, },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
};
static int parse_nl_config(struct genl_info *info,
@@ -571,6 +584,9 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[FOU_ATTR_TYPE])
cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+ if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
+ cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
+
return 0;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 53db2c3..ea82fd4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -134,6 +134,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
struct sock *child;
+ u32 end_seq;
req->num_retrans = 0;
req->num_timeout = 0;
@@ -185,20 +186,35 @@ static bool tcp_fastopen_create_child(struct sock *sk,
/* Queue the data carried in the SYN packet. We need to first
* bump skb's refcnt because the caller will attempt to free it.
+ * Note that IPv6 might also have used skb_get() trick
+ * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
+ * So we need to eventually get a clone of the packet,
+ * before inserting it in sk_receive_queue.
*
* XXX (TFO) - we honor a zero-payload TFO request for now,
* (any reason not to?) but no need to queue the skb since
* there is no data. How about SYN+FIN?
*/
- if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
- skb = skb_get(skb);
- skb_dst_drop(skb);
- __skb_pull(skb, tcp_hdr(skb)->doff * 4);
- skb_set_owner_r(skb, child);
- __skb_queue_tail(&child->sk_receive_queue, skb);
- tp->syn_data_acked = 1;
+ end_seq = TCP_SKB_CB(skb)->end_seq;
+ if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
+ struct sk_buff *skb2;
+
+ if (unlikely(skb_shared(skb)))
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ else
+ skb2 = skb_get(skb);
+
+ if (likely(skb2)) {
+ skb_dst_drop(skb2);
+ __skb_pull(skb2, tcp_hdrlen(skb));
+ skb_set_owner_r(skb2, child);
+ __skb_queue_tail(&child->sk_receive_queue, skb2);
+ tp->syn_data_acked = 1;
+ } else {
+ end_seq = TCP_SKB_CB(skb)->seq + 1;
+ }
}
- tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
sk->sk_data_ready(sk);
bh_unlock_sock(child);
sock_put(child);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index c2a75c6..2379c1b 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -47,6 +47,10 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
return;
percpu_counter_destroy(&cg_proto->sockets_allocated);
+
+ if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
+ static_key_slow_dec(&memcg_socket_limit_enabled);
+
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d10f6f4..4915d82 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -402,6 +402,13 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
}
rcu_read_unlock();
+
+ if (skb->remcsum_offload)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+
+ skb->encapsulation = 1;
+ skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
+
return err;
}
@@ -410,9 +417,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check)
+ if (uh->check) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
+ } else {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
return udp_gro_complete(skb, nhoff);
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 2f780cb..f45d6db 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -172,7 +172,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
{
int i;
- spin_lock(&ip6_fl_lock);
+ spin_lock_bh(&ip6_fl_lock);
for (i = 0; i <= FL_HASH_MASK; i++) {
struct ip6_flowlabel *fl;
struct ip6_flowlabel __rcu **flp;
@@ -190,7 +190,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
flp = &fl->next;
}
}
- spin_unlock(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
}
static struct ip6_flowlabel *fl_intern(struct net *net,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d33df4c..7deebf1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1273,7 +1273,7 @@ emsgsize:
/* If this is the first and only packet and device
* supports checksum offloading, let's use it.
*/
- if (!skb &&
+ if (!skb && sk->sk_protocol == IPPROTO_UDP &&
length + fragheaderlen < mtu &&
rt->dst.dev->features & NETIF_F_V6_CSUM &&
!exthdrlen)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 98565ce..4688bd4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -141,7 +141,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
u32 *p = NULL;
if (!(rt->dst.flags & DST_HOST))
- return NULL;
+ return dst_cow_metrics_generic(dst, old);
peer = rt6_get_peer_create(rt);
if (peer) {
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a562769..ab889bb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -161,9 +161,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check)
+ if (uh->check) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
+ } else {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
return udp_gro_complete(skb, nhoff);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 265e190..c598f74 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,6 +19,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
#include <net/netfilter/nf_tables.h>
static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -40,6 +41,7 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
+ struct ebt_entry ebt;
};
static inline void
@@ -50,9 +52,9 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
par->hotdrop = false;
}
-static void nft_target_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+static void nft_target_eval_xt(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
struct xt_target *target = expr->ops->data;
@@ -66,7 +68,7 @@ static void nft_target_eval(const struct nft_expr *expr,
if (pkt->xt.hotdrop)
ret = NF_DROP;
- switch(ret) {
+ switch (ret) {
case XT_CONTINUE:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
break;
@@ -74,7 +76,41 @@ static void nft_target_eval(const struct nft_expr *expr,
data[NFT_REG_VERDICT].verdict = ret;
break;
}
- return;
+}
+
+static void nft_target_eval_bridge(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_target *target = expr->ops->data;
+ struct sk_buff *skb = pkt->skb;
+ int ret;
+
+ nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+
+ ret = target->target(skb, &pkt->xt);
+
+ if (pkt->xt.hotdrop)
+ ret = NF_DROP;
+
+ switch (ret) {
+ case EBT_ACCEPT:
+ data[NFT_REG_VERDICT].verdict = NF_ACCEPT;
+ break;
+ case EBT_DROP:
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+ break;
+ case EBT_CONTINUE:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ break;
+ case EBT_RETURN:
+ data[NFT_REG_VERDICT].verdict = NFT_RETURN;
+ break;
+ default:
+ data[NFT_REG_VERDICT].verdict = ret;
+ break;
+ }
}
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
@@ -100,6 +136,10 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
+ case NFPROTO_BRIDGE:
+ entry->ebt.ethproto = proto;
+ entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+ break;
}
par->entryinfo = entry;
par->target = target;
@@ -307,6 +347,10 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
+ case NFPROTO_BRIDGE:
+ entry->ebt.ethproto = proto;
+ entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+ break;
}
par->entryinfo = entry;
par->match = match;
@@ -490,6 +534,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
case AF_INET6:
fmt = "ip6t_%s";
break;
+ case NFPROTO_BRIDGE:
+ fmt = "ebt_%s";
+ break;
default:
pr_err("nft_compat: unsupported protocol %d\n",
nfmsg->nfgen_family);
@@ -663,13 +710,17 @@ nft_target_select_ops(const struct nft_ctx *ctx,
nft_target->ops.type = &nft_target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
- nft_target->ops.eval = nft_target_eval;
nft_target->ops.init = nft_target_init;
nft_target->ops.destroy = nft_target_destroy;
nft_target->ops.dump = nft_target_dump;
nft_target->ops.validate = nft_target_validate;
nft_target->ops.data = target;
+ if (family == NFPROTO_BRIDGE)
+ nft_target->ops.eval = nft_target_eval_bridge;
+ else
+ nft_target->ops.eval = nft_target_eval_xt;
+
list_add(&nft_target->head, &nft_target_list);
return &nft_target->ops;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 6404a72..9615b8b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -39,6 +39,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET] = { .type = NLA_STRING },
+ [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
};
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index a845cd4..28cddc8 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1065,10 +1065,12 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
u16 family,
struct netlbl_lsm_secattr *secattr)
{
+ unsigned char *ptr;
+
switch (family) {
case AF_INET:
- if (CIPSO_V4_OPTEXIST(skb) &&
- cipso_v4_skbuff_getattr(skb, secattr) == 0)
+ ptr = cipso_v4_optptr(skb);
+ if (ptr && cipso_v4_getattr(ptr, secattr) == 0)
return 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
@@ -1094,7 +1096,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
*/
void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
{
- if (CIPSO_V4_OPTEXIST(skb))
+ if (cipso_v4_optptr(skb))
cipso_v4_error(skb, error, gateway);
}
@@ -1126,11 +1128,14 @@ void netlbl_cache_invalidate(void)
int netlbl_cache_add(const struct sk_buff *skb,
const struct netlbl_lsm_secattr *secattr)
{
+ unsigned char *ptr;
+
if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0)
return -ENOMSG;
- if (CIPSO_V4_OPTEXIST(skb))
- return cipso_v4_cache_add(skb, secattr);
+ ptr = cipso_v4_optptr(skb);
+ if (ptr)
+ return cipso_v4_cache_add(ptr, secattr);
return -ENOMSG;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e2c348b..50ec42f 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -717,6 +717,8 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr,
{
int err;
+ memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
+
/* Extract metadata from netlink attributes. */
err = ovs_nla_get_flow_metadata(attr, key, log);
if (err)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 993281e..216f20b 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1516,7 +1516,7 @@ int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
{
- return ovs_nla_put_key(&flow->mask->key, &flow->key,
+ return ovs_nla_put_key(&flow->key, &flow->key,
OVS_FLOW_ATTR_KEY, false, skb);
}
@@ -1746,7 +1746,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_key key;
struct ovs_tunnel_info *tun_info;
struct nlattr *a;
- int err, start, opts_type;
+ int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
diff --git a/net/rds/cong.c b/net/rds/cong.c
index e5b65ac..e6144b8 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -221,7 +221,21 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
if (!test_and_set_bit(0, &conn->c_map_queued)) {
rds_stats_inc(s_cong_update_queued);
- rds_send_xmit(conn);
+ /* We cannot inline the call to rds_send_xmit() here
+ * for two reasons (both pertaining to a TCP transport):
+ * 1. When we get here from the receive path, we
+ * are already holding the sock_lock (held by
+ * tcp_v4_rcv()). So inlining calls to
+ * tcp_setsockopt and/or tcp_sendmsg will deadlock
+ * when it tries to get the sock_lock())
+ * 2. Interrupts are masked so that we can mark the
+ * the port congested from both send and recv paths.
+ * (See comment around declaration of rdc_cong_lock).
+ * An attempt to get the sock_lock() here will
+ * therefore trigger warnings.
+ * Defer the xmit to rds_send_worker() instead.
+ */
+ queue_delayed_work(rds_wq, &conn->c_send_w, 0);
}
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 91eaef1..78974e4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
EXPORT_SYMBOL_GPL(svc_set_num_threads);
/*
- * Called from a server thread as it's exiting. Caller must hold the BKL or
- * the "service mutex", whichever is appropriate for the service.
+ * Called from a server thread as it's exiting. Caller must hold the "service
+ * mutex" for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c69358b..163ac45 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list);
* svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
- * BKL protects svc_serv->sv_nrthread.
+ * The "service mutex" protects svc_serv->sv_nrthread.
* svc_sock->sk_lock protects the svc_sock->sk_deferred list
* and the ->sk_info_authunix cache.
*
@@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will
* try to set XPT_DEAD.
*/
-
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{
struct svc_xprt_class *cl;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 65b1462..b681855 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
}
/*
- * Determine number of chunks and total bytes in chunk list. The chunk
- * list has already been verified to fit within the RPCRDMA header.
- */
-void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
- int *ch_count, int *byte_count)
-{
- /* compute the number of bytes represented by read chunks */
- *byte_count = 0;
- *ch_count = 0;
- for (; ch->rc_discrim != 0; ch++) {
- *byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
- *ch_count = *ch_count + 1;
- }
-}
-
-/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e011027..f9f13a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -43,7 +43,6 @@
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
-#include <linux/highmem.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt,
u32 byte_count)
{
+ struct rpcrdma_msg *rmsgp;
struct page *page;
u32 bc;
int sge_no;
@@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc;
rqstp->rq_arg.page_base = 0;
- rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
+ /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+ rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+ if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+ rqstp->rq_arg.pages = &rqstp->rq_pages[0];
+ else
+ rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
sge_no = 1;
while (bc && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
@@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* We should never run out of SGE because the limit is defined to
- * support the max allowed RPC data length
- */
- BUG_ON(bc && (sge_no == ctxt->count));
- BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
- != byte_count);
- BUG_ON(rqstp->rq_arg.len != byte_count);
-
/* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no;
while (sge_no < ctxt->count) {
@@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
return min_t(int, sge_count, xprt->sc_max_sge);
}
-typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last);
-
/* Issue an RDMA_READ using the local lkey to map the data sink */
-static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last)
+int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ bool last)
{
struct ib_send_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
@@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
-static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last)
+int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ bool last)
{
struct ib_send_wr read_wr;
struct ib_send_wr inv_wr;
@@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
return ret;
}
+static unsigned int
+rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
+{
+ unsigned int count;
+
+ for (count = 0; ch->rc_discrim != xdr_zero; ch++)
+ count++;
+ return count;
+}
+
+/* If there was additional inline content, append it to the end of arg.pages.
+ * Tail copy has to be done after the reader function has determined how many
+ * pages are needed for RDMA READ.
+ */
+static int
+rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
+ u32 position, u32 byte_count, u32 page_offset, int page_no)
+{
+ char *srcp, *destp;
+ int ret;
+
+ ret = 0;
+ srcp = head->arg.head[0].iov_base + position;
+ byte_count = head->arg.head[0].iov_len - position;
+ if (byte_count > PAGE_SIZE) {
+ dprintk("svcrdma: large tail unsupported\n");
+ return 0;
+ }
+
+ /* Fit as much of the tail on the current page as possible */
+ if (page_offset != PAGE_SIZE) {
+ destp = page_address(rqstp->rq_arg.pages[page_no]);
+ destp += page_offset;
+ while (byte_count--) {
+ *destp++ = *srcp++;
+ page_offset++;
+ if (page_offset == PAGE_SIZE && byte_count)
+ goto more;
+ }
+ goto done;
+ }
+
+more:
+ /* Fit the rest on the next page */
+ page_no++;
+ destp = page_address(rqstp->rq_arg.pages[page_no]);
+ while (byte_count--)
+ *destp++ = *srcp++;
+
+ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+done:
+ byte_count = head->arg.head[0].iov_len - position;
+ head->arg.page_len += byte_count;
+ head->arg.len += byte_count;
+ head->arg.buflen += byte_count;
+ return 1;
+}
+
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
- int page_no, ch_count, ret;
+ int page_no, ret;
struct rpcrdma_read_chunk *ch;
- u32 page_offset, byte_count;
+ u32 handle, page_offset, byte_count;
+ u32 position;
u64 rs_offset;
- rdma_reader_fn reader;
+ bool last;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
- svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
- if (ch_count > RPCSVC_MAXPAGES)
+ if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
return -EINVAL;
/* The request is completed when the RDMA_READs complete. The
@@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
*/
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count;
head->arg.page_base = 0;
head->arg.page_len = 0;
head->arg.len = rqstp->rq_arg.len;
head->arg.buflen = rqstp->rq_arg.buflen;
- /* Use FRMR if supported */
- if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
- reader = rdma_read_chunk_frmr;
- else
- reader = rdma_read_chunk_lcl;
+ ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+ position = be32_to_cpu(ch->rc_position);
+
+ /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+ if (position == 0) {
+ head->arg.pages = &head->pages[0];
+ page_offset = head->byte_len;
+ } else {
+ head->arg.pages = &head->pages[head->count];
+ page_offset = 0;
+ }
- page_no = 0; page_offset = 0;
- for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch->rc_discrim != 0; ch++) {
+ ret = 0;
+ page_no = 0;
+ for (; ch->rc_discrim != xdr_zero; ch++) {
+ if (be32_to_cpu(ch->rc_position) != position)
+ goto err;
+ handle = be32_to_cpu(ch->rc_target.rs_handle),
+ byte_count = be32_to_cpu(ch->rc_target.rs_length);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
- byte_count = ntohl(ch->rc_target.rs_length);
while (byte_count > 0) {
- ret = reader(xprt, rqstp, head,
- &page_no, &page_offset,
- ntohl(ch->rc_target.rs_handle),
- byte_count, rs_offset,
- ((ch+1)->rc_discrim == 0) /* last */
- );
+ last = (ch + 1)->rc_discrim == xdr_zero;
+ ret = xprt->sc_reader(xprt, rqstp, head,
+ &page_no, &page_offset,
+ handle, byte_count,
+ rs_offset, last);
if (ret < 0)
goto err;
byte_count -= ret;
@@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
head->arg.buflen += ret;
}
}
+
+ /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
+ if (page_offset & 3) {
+ u32 pad = 4 - (page_offset & 3);
+
+ head->arg.page_len += pad;
+ head->arg.len += pad;
+ head->arg.buflen += pad;
+ page_offset += pad;
+ }
+
ret = 1;
+ if (position && position < head->arg.head[0].iov_len)
+ ret = rdma_copy_tail(rqstp, head, position,
+ byte_count, page_offset, page_no);
+ head->arg.head[0].iov_len = position;
+ head->position = position;
+
err:
/* Detach arg pages. svc_recv will replenish them */
for (page_no = 0;
@@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
return ret;
}
-/*
- * To avoid a separate RDMA READ just for a handful of zero bytes,
- * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
- * in chunk lists.
- */
-static void
-rdma_fix_xdr_pad(struct xdr_buf *buf)
-{
- unsigned int page_len = buf->page_len;
- unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
- unsigned int offset, pg_no;
- char *p;
-
- if (size == 0)
- return;
-
- pg_no = page_len >> PAGE_SHIFT;
- offset = page_len & ~PAGE_MASK;
- p = page_address(buf->pages[pg_no]);
- memset(p + offset, 0, size);
-
- buf->page_len += size;
- buf->buflen += size;
- buf->len += size;
-}
-
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
int page_no;
int ret;
- BUG_ON(!head);
-
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->pages[page_no];
}
+
+ /* Adjustments made for RDMA_NOMSG type requests */
+ if (head->position == 0) {
+ if (head->arg.len <= head->sge[0].length) {
+ head->arg.head[0].iov_len = head->arg.len -
+ head->byte_len;
+ head->arg.page_len = 0;
+ } else {
+ head->arg.head[0].iov_len = head->sge[0].length -
+ head->byte_len;
+ head->arg.page_len = head->arg.len -
+ head->sge[0].length;
+ }
+ }
+
/* Point rq_arg.pages past header */
- rdma_fix_xdr_pad(&head->arg);
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
@@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
- dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
- "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+ dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
+ "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
@@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
- BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
@@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ rqstp->rq_arg.tail[0].iov_len;
svc_rdma_put_context(ctxt, 0);
out:
- dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
- "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+ dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
+ "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
ret, rqstp->rq_arg.len,
rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9f1b506..7de33d1a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
u32 page_off;
int page_no;
- BUG_ON(xdr->len !=
- (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+ if (xdr->len !=
+ (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
+ pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ return -EIO;
+ }
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
int bc;
struct svc_rdma_op_ctxt *ctxt;
- BUG_ON(vec->count > RPCSVC_MAXPAGES);
+ if (vec->count > RPCSVC_MAXPAGES) {
+ pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
+ return -EIO;
+ }
+
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off,
@@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_off = 0;
sge_no++;
xdr_sge_no++;
- BUG_ON(xdr_sge_no > vec->count);
+ if (xdr_sge_no > vec->count) {
+ pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
+ goto err;
+ }
bc -= sge_bytes;
if (sge_no == xprt->sc_max_sge)
break;
@@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
- BUG_ON(byte_count != 0);
+ if (byte_count != 0) {
+ pr_err("svcrdma: Could not map %d bytes\n", byte_count);
+ goto err;
+ }
/* Save all respages in the ctxt and remove them from the
* respages array. They are our pages until the I/O
@@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
- BUG_ON(sge_no > rdma->sc_max_sge);
+ if (sge_no > rdma->sc_max_sge) {
+ pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ goto err;
+ }
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
@@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
-/*
- * Return the start of an xdr buffer.
- */
-static void *xdr_start(struct xdr_buf *xdr)
-{
- return xdr->head[0].iov_base -
- (xdr->len -
- xdr->page_len -
- xdr->tail[0].iov_len -
- xdr->head[0].iov_len);
-}
-
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
@@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
- /* Get the RDMA request header. */
- rdma_argp = xdr_start(&rqstp->rq_arg);
+ /* Get the RDMA request header. The receive logic always
+ * places this at the start of page 0.
+ */
+ rdma_argp = page_address(rqstp->rq_pages[0]);
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4e61880..f609c1c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
struct svcxprt_rdma *xprt;
int i;
- BUG_ON(!ctxt);
xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
@@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
- BUG_ON(ctxt->frmr);
+ if (ctxt->frmr)
+ pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
- BUG_ON(ctxt->frmr);
+ if (ctxt->frmr)
+ pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
svc_rdma_put_context(ctxt, 0);
break;
@@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt,
svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- BUG_ON(!read_hdr);
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ if (read_hdr) {
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ } else {
+ pr_err("svcrdma: ctxt->read_hdr == NULL\n");
+ }
svc_xprt_enqueue(&xprt->sc_xprt);
}
svc_rdma_put_context(ctxt, 0);
break;
default:
- BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
@@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
buflen = 0;
ctxt->direction = DMA_FROM_DEVICE;
for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
- BUG_ON(sge_no >= xprt->sc_max_sge);
+ if (sge_no >= xprt->sc_max_sge) {
+ pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ goto err_put_ctxt;
+ }
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
@@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
{
struct rdma_cm_id *listen_id;
struct svcxprt_rdma *cma_xprt;
- struct svc_xprt *xprt;
int ret;
dprintk("svcrdma: Creating RDMA socket\n");
@@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
cma_xprt = rdma_create_xprt(serv, 1);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
- xprt = &cma_xprt->sc_xprt;
listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
IB_QPT_RC);
@@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
frmr_unmap_dma(rdma, frmr);
spin_lock_bh(&rdma->sc_frmr_q_lock);
- BUG_ON(!list_empty(&frmr->frmr_list));
+ WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
spin_unlock_bh(&rdma->sc_frmr_q_lock);
}
@@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
* NB: iWARP requires remote write access for the data sink
* of an RDMA_READ. IB does not.
*/
+ newxprt->sc_reader = rdma_read_chunk_lcl;
if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
devattr.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+ newxprt->sc_reader = rdma_read_chunk_frmr;
}
/*
@@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work)
dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
/* We should only be called from kref_put */
- BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+ if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ pr_err("svcrdma: sc_xprt still in use? (%d)\n",
+ atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
@@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work)
}
/* Warn if we leaked a resource or under-referenced */
- WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
- WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
+ if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ pr_err("svcrdma: ctxt still in use? (%d)\n",
+ atomic_read(&rdma->sc_ctxt_used));
+ if (atomic_read(&rdma->sc_dma_used) != 0)
+ pr_err("svcrdma: dma still in use? (%d)\n",
+ atomic_read(&rdma->sc_dma_used));
/* De-allocate fastreg mr */
rdma_dealloc_frmr_q(rdma);
@@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
return -ENOTCONN;
- BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
wr_count = 1;
for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
wr_count++;