summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/6lowpan.c13
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/fib_rules.c33
-rw-r--r--net/core/gen_estimator.c24
-rw-r--r--net/core/gen_stats.c35
-rw-r--r--net/core/skbuff.c46
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa.c253
-rw-r--r--net/dsa/dsa2.c690
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/slave.c80
-rw-r--r--net/dsa/tag_brcm.c4
-rw-r--r--net/dsa/tag_dsa.c10
-rw-r--r--net/dsa/tag_edsa.c10
-rw-r--r--net/dsa/tag_trailer.c4
-rw-r--r--net/ieee802154/6lowpan/core.c15
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv4/fou.c81
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/tcp_input.c26
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/ila/ila.h3
-rw-r--r--net/ipv6/ila/ila_common.c6
-rw-r--r--net/ipv6/ila/ila_lwt.c4
-rw-r--r--net/ipv6/ila/ila_xlat.c8
-rw-r--r--net/ipv6/ip6_input.c1
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/l2tp/l2tp_eth.c4
-rw-r--r--net/l3mdev/l3mdev.c38
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netlink/af_netlink.h14
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/rxrpc/af_rxrpc.c195
-rw-r--r--net/rxrpc/ar-accept.c2
-rw-r--r--net/rxrpc/ar-ack.c2
-rw-r--r--net/rxrpc/ar-call.c170
-rw-r--r--net/rxrpc/ar-connection.c19
-rw-r--r--net/rxrpc/ar-connevent.c2
-rw-r--r--net/rxrpc/ar-input.c2
-rw-r--r--net/rxrpc/ar-internal.h52
-rw-r--r--net/rxrpc/ar-key.c4
-rw-r--r--net/rxrpc/ar-local.c2
-rw-r--r--net/rxrpc/ar-output.c188
-rw-r--r--net/rxrpc/ar-peer.c2
-rw-r--r--net/rxrpc/ar-recvmsg.c4
-rw-r--r--net/rxrpc/ar-skbuff.c2
-rw-r--r--net/rxrpc/ar-transport.c2
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/sched/act_api.c24
-rw-r--r--net/sched/act_bpf.c8
-rw-r--r--net/sched/act_connmark.c6
-rw-r--r--net/sched/act_csum.c7
-rw-r--r--net/sched/act_gact.c7
-rw-r--r--net/sched/act_ife.c10
-rw-r--r--net/sched/act_ipt.c14
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_nat.c7
-rw-r--r--net/sched/act_pedit.c8
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_simple.c7
-rw-r--r--net/sched/act_skbedit.c7
-rw-r--r--net/sched/act_vlan.c9
-rw-r--r--net/sched/cls_api.c11
-rw-r--r--net/sched/cls_flower.c31
-rw-r--r--net/sched/sch_api.c21
-rw-r--r--net/sched/sch_atm.c18
-rw-r--r--net/sched/sch_cbq.c294
-rw-r--r--net/sched/sch_choke.c17
-rw-r--r--net/sched/sch_drr.c31
-rw-r--r--net/sched/sch_dsmark.c18
-rw-r--r--net/sched/sch_fifo.c7
-rw-r--r--net/sched/sch_fq_codel.c25
-rw-r--r--net/sched/sch_generic.c10
-rw-r--r--net/sched/sch_gred.c35
-rw-r--r--net/sched/sch_hfsc.c37
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c37
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c11
-rw-r--r--net/sched/sch_multiq.c25
-rw-r--r--net/sched/sch_netem.c34
-rw-r--r--net/sched/sch_plug.c2
-rw-r--r--net/sched/sch_prio.c23
-rw-r--r--net/sched/sch_qfq.c56
-rw-r--r--net/sched/sch_red.c21
-rw-r--r--net/sched/sch_sfq.c1
-rw-r--r--net/sched/sch_tbf.c18
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/input.c57
-rw-r--r--net/sctp/inqueue.c78
-rw-r--r--net/sctp/offload.c98
-rw-r--r--net/sctp/output.c366
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/node.c22
102 files changed, 2120 insertions, 1561 deletions
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 780089d..d020299 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -627,20 +627,9 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
return err < 0 ? NET_XMIT_DROP : err;
}
-static struct lock_class_key bt_tx_busylock;
-static struct lock_class_key bt_netdev_xmit_lock_key;
-
-static void bt_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &bt_netdev_xmit_lock_key);
-}
-
static int bt_dev_init(struct net_device *dev)
{
- netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL);
- dev->qdisc_tx_busylock = &bt_tx_busylock;
+ netdev_lockdep_set_classes(dev);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 904ff43..c43c9d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -139,6 +139,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/sctp.h>
+#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -2249,11 +2250,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
*/
int netif_get_num_default_rss_queues(void)
{
- return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ return is_kdump_kernel() ?
+ 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
@@ -3075,7 +3077,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
@@ -3898,22 +3900,14 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- qdisc_run(q);
- spin_unlock(root_lock);
- } else {
- if (!test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state)) {
- __netif_reschedule(q);
- } else {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- }
- }
+ spin_lock(root_lock);
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+ qdisc_run(q);
+ spin_unlock(root_lock);
}
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f403481..9774898 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 840aceb..98298b1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
- struct flowi *fl, int flags)
+ struct flowi *fl, int flags,
+ struct fib_lookup_arg *arg)
{
int ret = 0;
@@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
goto out;
+ if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
+ goto out;
+
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
- if (!fib_rule_match(rule, ops, fl, flags))
+ if (!fib_rule_match(rule, ops, fl, flags, arg))
continue;
if (rule->action == FR_ACT_GOTO) {
@@ -265,7 +269,7 @@ errout:
return err;
}
-static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -336,6 +340,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (tb[FRA_TUN_ID])
rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ if (tb[FRA_L3MDEV]) {
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
+ if (rule->l3mdev != 1)
+#endif
+ goto errout_free;
+ }
+
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
@@ -371,6 +383,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
+ if (rule->l3mdev && rule->table)
+ goto errout_free;
+
err = ops->configure(rule, skb, frh, tb);
if (err < 0)
goto errout_free;
@@ -424,8 +439,9 @@ errout:
rules_ops_put(ops);
return err;
}
+EXPORT_SYMBOL_GPL(fib_nl_newrule);
-static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -483,6 +499,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
(rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
continue;
+ if (tb[FRA_L3MDEV] &&
+ (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -536,6 +556,7 @@ errout:
rules_ops_put(ops);
return err;
}
+EXPORT_SYMBOL_GPL(fib_nl_delrule);
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
struct fib_rule *rule)
@@ -607,7 +628,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)) ||
(rule->tun_id &&
- nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)))
+ nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
+ (rule->l3mdev &&
+ nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 4573d81..cad8e79 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -84,6 +84,7 @@ struct gen_estimator
struct gnet_stats_basic_packed *bstats;
struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
+ seqcount_t *running;
int ewma_log;
u32 last_packets;
unsigned long avpps;
@@ -121,26 +122,28 @@ static void est_timer(unsigned long arg)
unsigned long rate;
u64 brate;
- spin_lock(e->stats_lock);
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
read_lock(&est_lock);
if (e->bstats == NULL)
goto skip;
- __gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
+ __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
brate = (b.bytes - e->last_bytes)<<(7 - idx);
e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
- e->rate_est->bps = (e->avbps+0xF)>>5;
+ WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
rate = b.packets - e->last_packets;
rate <<= (7 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- e->rate_est->pps = (e->avpps + 0xF) >> 5;
+ WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
skip:
read_unlock(&est_lock);
- spin_unlock(e->stats_lock);
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
}
if (!list_empty(&elist[idx].list))
@@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
+ seqcount_t *running,
struct nlattr *opt)
{
struct gen_estimator *est;
@@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;
- __gnet_stats_copy_basic(&b, cpu_bstats, bstats);
+ __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
+ est->running = running;
est->ewma_log = parm->ewma_log;
est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
@@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
@@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
- spinlock_t *stats_lock, struct nlattr *opt)
+ spinlock_t *stats_lock,
+ seqcount_t *running, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
+ return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index be873e4..508e051 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
return 0;
nla_put_failure:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
@@ -66,15 +67,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
{
memset(d, 0, sizeof(*d));
- spin_lock_bh(lock);
- d->lock = lock;
if (type)
d->tail = (struct nlattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
d->padattr = padattr;
-
+ if (lock) {
+ d->lock = lock;
+ spin_lock_bh(lock);
+ }
if (d->tail)
return gnet_stats_copy(d, type, NULL, 0, padattr);
@@ -128,21 +130,29 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
}
void
-__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+__gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
+ unsigned int seq;
+
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
- } else {
+ return;
+ }
+ do {
+ if (running)
+ seq = read_seqcount_begin(running);
bstats->bytes = b->bytes;
bstats->packets = b->packets;
- }
+ } while (running && read_seqcount_retry(running, seq));
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
@@ -154,13 +164,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(struct gnet_dump *d,
+gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
struct gnet_stats_basic_packed bstats = {0};
- __gnet_stats_copy_basic(&bstats, cpu, b);
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
if (d->compat_tc_stats) {
d->tc_stats.bytes = bstats.bytes;
@@ -330,8 +341,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
return 0;
err_out:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
EXPORT_SYMBOL(gnet_stats_copy_app);
@@ -365,10 +377,11 @@ gnet_stats_finish_copy(struct gnet_dump *d)
return -1;
}
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f2b77e5..e7ec6d3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
+#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@@ -3116,9 +3117,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
- len = head_skb->len - offset;
- if (len > mss)
- len = mss;
+ if (unlikely(mss == GSO_BY_FRAGS)) {
+ len = list_skb->len;
+ } else {
+ len = head_skb->len - offset;
+ if (len > mss)
+ len = mss;
+ }
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
@@ -3438,6 +3443,7 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
+EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
@@ -4378,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
+ } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+ thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
@@ -4387,6 +4395,38 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+/**
+ * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
+ * once split.
+ */
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const struct sk_buff *iter;
+ unsigned int hlen;
+
+ hlen = skb_gso_network_seglen(skb);
+
+ if (shinfo->gso_size != GSO_BY_FRAGS)
+ return hlen <= mtu;
+
+ /* Undo this so we can re-use header sizes */
+ hlen -= GSO_BY_FRAGS;
+
+ skb_walk_frags(skb, iter) {
+ if (hlen + skb_headlen(iter) > mtu)
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index da06ed1..8af4ded 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o
+dsa_core-y += dsa.o slave.o dsa2.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index eff5dfc..766d2a5 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -29,6 +29,33 @@
char dsa_driver_version[] = "0.1";
+static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Just return the original SKB */
+ return skb;
+}
+
+static const struct dsa_device_ops none_ops = {
+ .xmit = dsa_slave_notag_xmit,
+ .rcv = NULL,
+};
+
+const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
+ [DSA_TAG_PROTO_NONE] = &none_ops,
+};
/* switch driver registration ***********************************************/
static DEFINE_MUTEX(dsa_switch_drivers_mutex);
@@ -180,41 +207,100 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
#endif /* CONFIG_NET_DSA_HWMON */
/* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master)
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+ struct device_node *port_dn, int port)
{
- struct dsa_chip_data *cd = ds->cd;
- struct device_node *port_dn;
struct phy_device *phydev;
- int ret, port, mode;
+ int ret, mode;
+
+ if (of_phy_is_fixed_link(port_dn)) {
+ ret = of_phy_register_fixed_link(port_dn);
+ if (ret) {
+ dev_err(dev, "failed to register fixed PHY\n");
+ return ret;
+ }
+ phydev = of_phy_find_device(port_dn);
+
+ mode = of_get_phy_mode(port_dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
+
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
+ if (ds->drv->adjust_link)
+ ds->drv->adjust_link(ds, port, phydev);
+ }
+
+ return 0;
+}
+
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+{
+ struct device_node *port_dn;
+ int ret, port;
for (port = 0; port < DSA_MAX_PORTS; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- port_dn = cd->port_dn[port];
- if (of_phy_is_fixed_link(port_dn)) {
- ret = of_phy_register_fixed_link(port_dn);
- if (ret) {
- netdev_err(master,
- "failed to register fixed PHY\n");
- return ret;
- }
- phydev = of_phy_find_device(port_dn);
+ port_dn = ds->ports[port].dn;
+ ret = dsa_cpu_dsa_setup(ds, dev, port_dn, port);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
+{
+ const struct dsa_device_ops *ops;
+
+ if (tag_protocol >= DSA_TAG_LAST)
+ return ERR_PTR(-EINVAL);
+ ops = dsa_device_ops[tag_protocol];
+
+ if (!ops)
+ return ERR_PTR(-ENOPROTOOPT);
+
+ return ops;
+}
+
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
+{
+ struct net_device *master;
+ struct ethtool_ops *cpu_ops;
+
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
+ cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
+ if (!cpu_ops)
+ return -ENOMEM;
+
+ memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops,
+ sizeof(struct ethtool_ops));
+ ds->dst->master_orig_ethtool_ops = master->ethtool_ops;
+ memcpy(cpu_ops, &ds->dst->master_ethtool_ops,
+ sizeof(struct ethtool_ops));
+ dsa_cpu_port_ethtool_init(cpu_ops);
+ master->ethtool_ops = cpu_ops;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
- if (ds->drv->adjust_link)
- ds->drv->adjust_link(ds, port, phydev);
- }
- }
return 0;
}
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
+{
+ struct net_device *master;
+
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
+ master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
+}
+
static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
{
struct dsa_switch_driver *drv = ds->drv;
@@ -243,6 +329,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
}
dst->cpu_switch = index;
dst->cpu_port = i;
+ ds->cpu_port_mask |= 1 << i;
} else if (!strcmp(name, "dsa")) {
ds->dsa_port_mask |= 1 << i;
} else {
@@ -267,37 +354,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* switch.
*/
if (dst->cpu_switch == index) {
- switch (drv->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
- case DSA_TAG_PROTO_DSA:
- dst->rcv = dsa_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- case DSA_TAG_PROTO_EDSA:
- dst->rcv = edsa_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- case DSA_TAG_PROTO_TRAILER:
- dst->rcv = trailer_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- case DSA_TAG_PROTO_BRCM:
- dst->rcv = brcm_netdev_ops.rcv;
- break;
-#endif
- case DSA_TAG_PROTO_NONE:
- break;
- default:
- ret = -ENOPROTOOPT;
+ dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ ret = PTR_ERR(dst->tag_ops);
goto out;
}
- dst->tag_protocol = drv->tag_protocol;
+ dst->rcv = dst->tag_ops->rcv;
}
+ memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
+
/*
* Do basic register setup.
*/
@@ -309,22 +376,25 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
if (ret < 0)
goto out;
- ds->slave_mii_bus = devm_mdiobus_alloc(parent);
- if (ds->slave_mii_bus == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- dsa_slave_mii_bus_init(ds);
-
- ret = mdiobus_register(ds->slave_mii_bus);
- if (ret < 0)
- goto out;
+ if (!ds->slave_mii_bus && drv->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+ if (!ds->slave_mii_bus) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dsa_slave_mii_bus_init(ds);
+ ret = mdiobus_register(ds->slave_mii_bus);
+ if (ret < 0)
+ goto out;
+ }
/*
* Create network devices for physical switch ports.
*/
for (i = 0; i < DSA_MAX_PORTS; i++) {
+ ds->ports[i].dn = cd->port_dn[i];
+
if (!(ds->enabled_port_mask & (1 << i)))
continue;
@@ -337,13 +407,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
}
/* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setup(ds, dst->master_netdev);
+ ret = dsa_cpu_dsa_setups(ds, parent);
if (ret < 0) {
netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
index);
ret = 0;
}
+ ret = dsa_cpu_port_ethtool_setup(ds);
+ if (ret)
+ return ret;
+
#ifdef CONFIG_NET_DSA_HWMON
/* If the switch provides a temperature sensor,
* register with hardware monitoring subsystem.
@@ -420,11 +494,21 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
return ds;
}
-static void dsa_switch_destroy(struct dsa_switch *ds)
+void dsa_cpu_dsa_destroy(struct device_node *port_dn)
{
- struct device_node *port_dn;
struct phy_device *phydev;
- struct dsa_chip_data *cd = ds->cd;
+
+ if (of_phy_is_fixed_link(port_dn)) {
+ phydev = of_phy_find_device(port_dn);
+ if (phydev) {
+ phy_device_free(phydev);
+ fixed_phy_unregister(phydev);
+ }
+ }
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
int port;
#ifdef CONFIG_NET_DSA_HWMON
@@ -437,26 +521,25 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
if (!(ds->enabled_port_mask & (1 << port)))
continue;
- if (!ds->ports[port])
+ if (!ds->ports[port].netdev)
continue;
- dsa_slave_destroy(ds->ports[port]);
+ dsa_slave_destroy(ds->ports[port].netdev);
}
- /* Remove any fixed link PHYs */
+ /* Disable configuration of the CPU and DSA ports */
for (port = 0; port < DSA_MAX_PORTS; port++) {
- port_dn = cd->port_dn[port];
- if (of_phy_is_fixed_link(port_dn)) {
- phydev = of_phy_find_device(port_dn);
- if (phydev) {
- phy_device_free(phydev);
- of_node_put(port_dn);
- fixed_phy_unregister(phydev);
- }
- }
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+ dsa_cpu_dsa_destroy(ds->ports[port].dn);
+
+ /* Clearing a bit which is not set does no harm */
+ ds->cpu_port_mask |= ~(1 << port);
+ ds->dsa_port_mask |= ~(1 << port);
}
- mdiobus_unregister(ds->slave_mii_bus);
+ if (ds->slave_mii_bus && ds->drv->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
}
#ifdef CONFIG_PM_SLEEP
@@ -469,7 +552,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i]);
+ ret = dsa_slave_suspend(ds->ports[i].netdev);
if (ret)
return ret;
}
@@ -495,7 +578,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i]);
+ ret = dsa_slave_resume(ds->ports[i].netdev);
if (ret)
return ret;
}
@@ -587,17 +670,6 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
if (link_sw_addr >= pd->nr_chips)
return -EINVAL;
- /* First time routing table allocation */
- if (!cd->rtable) {
- cd->rtable = kmalloc_array(pd->nr_chips, sizeof(s8),
- GFP_KERNEL);
- if (!cd->rtable)
- return -ENOMEM;
-
- /* default to no valid uplink/downlink */
- memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
- }
-
cd->rtable[link_sw_addr] = port_index;
return 0;
@@ -639,7 +711,6 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
kfree(pd->chip[i].port_names[port_index]);
port_index++;
}
- kfree(pd->chip[i].rtable);
/* Drop our reference to the MDIO bus device */
if (pd->chip[i].host_dev)
@@ -931,6 +1002,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
+ dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
dev_put(dst->master_netdev);
}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
new file mode 100644
index 0000000..83b95fc
--- /dev/null
+++ b/net/dsa/dsa2.c
@@ -0,0 +1,690 @@
+/*
+ * net/dsa/dsa2.c - Hardware switch handling, binding version 2
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <net/dsa.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include "dsa_priv.h"
+
+static LIST_HEAD(dsa_switch_trees);
+static DEFINE_MUTEX(dsa2_mutex);
+
+static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+{
+ struct dsa_switch_tree *dst;
+
+ list_for_each_entry(dst, &dsa_switch_trees, list)
+ if (dst->tree == tree)
+ return dst;
+ return NULL;
+}
+
+static void dsa_free_dst(struct kref *ref)
+{
+ struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
+ refcount);
+
+ list_del(&dst->list);
+ kfree(dst);
+}
+
+static void dsa_put_dst(struct dsa_switch_tree *dst)
+{
+ kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+ dst->tree = tree;
+ dst->cpu_switch = -1;
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dsa_switch_trees, &dst->list);
+ kref_init(&dst->refcount);
+
+ return dst;
+}
+
+static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds, u32 index)
+{
+ kref_get(&dst->refcount);
+ dst->ds[index] = ds;
+}
+
+static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds, u32 index)
+{
+ dst->ds[index] = NULL;
+ kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static bool dsa_port_is_dsa(struct device_node *port)
+{
+ const char *name;
+
+ name = of_get_property(port, "label", NULL);
+ if (!name)
+ return false;
+
+ if (!strcmp(name, "dsa"))
+ return true;
+
+ return false;
+}
+
+static bool dsa_port_is_cpu(struct device_node *port)
+{
+ const char *name;
+
+ name = of_get_property(port, "label", NULL);
+ if (!name)
+ return false;
+
+ if (!strcmp(name, "cpu"))
+ return true;
+
+ return false;
+}
+
+static bool dsa_ds_find_port(struct dsa_switch *ds,
+ struct device_node *port)
+{
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++)
+ if (ds->ports[index].dn == port)
+ return true;
+ return false;
+}
+
+static struct dsa_switch *dsa_dst_find_port(struct dsa_switch_tree *dst,
+ struct device_node *port)
+{
+ struct dsa_switch *ds;
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ if (dsa_ds_find_port(ds, port))
+ return ds;
+ }
+
+ return NULL;
+}
+
+static int dsa_port_complete(struct dsa_switch_tree *dst,
+ struct dsa_switch *src_ds,
+ struct device_node *port,
+ u32 src_port)
+{
+ struct device_node *link;
+ int index;
+ struct dsa_switch *dst_ds;
+
+ for (index = 0;; index++) {
+ link = of_parse_phandle(port, "link", index);
+ if (!link)
+ break;
+
+ dst_ds = dsa_dst_find_port(dst, link);
+ of_node_put(link);
+
+ if (!dst_ds)
+ return 1;
+
+ src_ds->rtable[dst_ds->index] = src_port;
+ }
+
+ return 0;
+}
+
+/* A switch is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (!dsa_port_is_dsa(port))
+ continue;
+
+ err = dsa_port_complete(dst, ds, port, index);
+ if (err != 0)
+ return err;
+
+ ds->dsa_port_mask |= BIT(index);
+ }
+
+ return 0;
+}
+
+/* A tree is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_dst_complete(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_complete(dst, ds);
+ if (err != 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int dsa_dsa_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ int err;
+
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ if (err) {
+ dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_dsa_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ dsa_cpu_dsa_destroy(port);
+}
+
+static int dsa_cpu_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ int err;
+
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ if (err) {
+ dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ ds->cpu_port_mask |= BIT(index);
+
+ return 0;
+}
+
+static void dsa_cpu_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ dsa_cpu_dsa_destroy(port);
+ ds->cpu_port_mask &= ~BIT(index);
+
+}
+
+static int dsa_user_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ const char *name;
+ int err;
+
+ name = of_get_property(port, "label", NULL);
+
+ err = dsa_slave_create(ds, ds->dev, index, name);
+ if (err) {
+ dev_warn(ds->dev, "Failed to create slave %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_user_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ if (ds->ports[index].netdev) {
+ dsa_slave_destroy(ds->ports[index].netdev);
+ ds->ports[index].netdev = NULL;
+ ds->enabled_port_mask &= ~(1 << index);
+ }
+}
+
+static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+ * driver and before drv->setup() has run, since the switch drivers and
+ * the slave MDIO bus driver rely on these values for probing PHY
+ * devices or not
+ */
+ ds->phys_mii_mask = ds->enabled_port_mask;
+
+ err = ds->drv->setup(ds);
+ if (err < 0)
+ return err;
+
+ err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ if (err < 0)
+ return err;
+
+ err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ if (err < 0)
+ return err;
+
+ if (!ds->slave_mii_bus && ds->drv->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ if (!ds->slave_mii_bus)
+ return -ENOMEM;
+
+ dsa_slave_mii_bus_init(ds);
+
+ err = mdiobus_register(ds->slave_mii_bus);
+ if (err < 0)
+ return err;
+ }
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_dsa(port)) {
+ err = dsa_dsa_port_apply(port, index, ds);
+ if (err)
+ return err;
+ continue;
+ }
+
+ if (dsa_port_is_cpu(port)) {
+ err = dsa_cpu_port_apply(port, index, ds);
+ if (err)
+ return err;
+ continue;
+ }
+
+ err = dsa_user_port_apply(port, index, ds);
+ if (err)
+ continue;
+ }
+
+ return 0;
+}
+
+static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_dsa(port)) {
+ dsa_dsa_port_unapply(port, index, ds);
+ continue;
+ }
+
+ if (dsa_port_is_cpu(port)) {
+ dsa_cpu_port_unapply(port, index, ds);
+ continue;
+ }
+
+ dsa_user_port_unapply(port, index, ds);
+ }
+
+ if (ds->slave_mii_bus && ds->drv->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+}
+
+static int dsa_dst_apply(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_apply(dst, ds);
+ if (err)
+ return err;
+ }
+
+ err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+ if (err)
+ return err;
+
+ /* If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+ dst->master_netdev->dsa_ptr = (void *)dst;
+ dst->applied = true;
+
+ return 0;
+}
+
+static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+
+ if (!dst->applied)
+ return;
+
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ dsa_ds_unapply(dst, ds);
+ }
+
+ dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
+ pr_info("DSA: tree %d unapplied\n", dst->tree);
+ dst->applied = false;
+}
+
+static int dsa_cpu_parse(struct device_node *port, u32 index,
+ struct dsa_switch_tree *dst,
+ struct dsa_switch *ds)
+{
+ struct net_device *ethernet_dev;
+ struct device_node *ethernet;
+
+ ethernet = of_parse_phandle(port, "ethernet", 0);
+ if (!ethernet)
+ return -EINVAL;
+
+ ethernet_dev = of_find_net_device_by_node(ethernet);
+ if (!ethernet_dev)
+ return -EPROBE_DEFER;
+
+ if (!ds->master_netdev)
+ ds->master_netdev = ethernet_dev;
+
+ if (!dst->master_netdev)
+ dst->master_netdev = ethernet_dev;
+
+ if (dst->cpu_switch == -1) {
+ dst->cpu_switch = ds->index;
+ dst->cpu_port = index;
+ }
+
+ dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ return PTR_ERR(dst->tag_ops);
+ }
+
+ dst->rcv = dst->tag_ops->rcv;
+
+ return 0;
+}
+
+static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_cpu(port)) {
+ err = dsa_cpu_parse(port, index, dst, ds);
+ if (err)
+ return err;
+ }
+ }
+
+ pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+
+ return 0;
+}
+
+static int dsa_dst_parse(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_parse(dst, ds);
+ if (err)
+ return err;
+ }
+
+ if (!dst->master_netdev) {
+ pr_warn("Tree has no master device\n");
+ return -EINVAL;
+ }
+
+ pr_info("DSA: tree %d parsed\n", dst->tree);
+
+ return 0;
+}
+
+static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ int err;
+ u32 reg;
+
+ for_each_available_child_of_node(ports, port) {
+ err = of_property_read_u32(port, "reg", &reg);
+ if (err)
+ return err;
+
+ if (reg >= DSA_MAX_PORTS)
+ return -EINVAL;
+
+ ds->ports[reg].dn = port;
+
+ /* Initialize enabled_port_mask now for drv->setup()
+ * to have access to a correct value, just like what
+ * net/dsa/dsa.c::dsa_switch_setup_one does.
+ */
+ if (!dsa_port_is_cpu(port))
+ ds->enabled_port_mask |= 1 << reg;
+ }
+
+ return 0;
+}
+
+static int dsa_parse_member(struct device_node *np, u32 *tree, u32 *index)
+{
+ int err;
+
+ *tree = *index = 0;
+
+ err = of_property_read_u32_index(np, "dsa,member", 0, tree);
+ if (err) {
+ /* Does not exist, but it is optional */
+ if (err == -EINVAL)
+ return 0;
+ return err;
+ }
+
+ err = of_property_read_u32_index(np, "dsa,member", 1, index);
+ if (err)
+ return err;
+
+ if (*index >= DSA_MAX_SWITCHES)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct device_node *dsa_get_ports(struct dsa_switch *ds,
+ struct device_node *np)
+{
+ struct device_node *ports;
+
+ ports = of_get_child_by_name(np, "ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return ports;
+}
+
+static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+ struct device_node *ports = dsa_get_ports(ds, np);
+ struct dsa_switch_tree *dst;
+ u32 tree, index;
+ int err;
+
+ err = dsa_parse_member(np, &tree, &index);
+ if (err)
+ return err;
+
+ if (IS_ERR(ports))
+ return PTR_ERR(ports);
+
+ err = dsa_parse_ports_dn(ports, ds);
+ if (err)
+ return err;
+
+ dst = dsa_get_dst(tree);
+ if (!dst) {
+ dst = dsa_add_dst(tree);
+ if (!dst)
+ return -ENOMEM;
+ }
+
+ if (dst->ds[index]) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ ds->dst = dst;
+ ds->index = index;
+ dsa_dst_add_ds(dst, ds, index);
+
+ err = dsa_dst_complete(dst);
+ if (err < 0)
+ goto out_del_dst;
+
+ if (err == 1) {
+ /* Not all switches registered yet */
+ err = 0;
+ goto out;
+ }
+
+ if (dst->applied) {
+ pr_info("DSA: Disjoint trees?\n");
+ return -EINVAL;
+ }
+
+ err = dsa_dst_parse(dst);
+ if (err)
+ goto out_del_dst;
+
+ err = dsa_dst_apply(dst);
+ if (err) {
+ dsa_dst_unapply(dst);
+ goto out_del_dst;
+ }
+
+ dsa_put_dst(dst);
+ return 0;
+
+out_del_dst:
+ dsa_dst_del_ds(dst, ds, ds->index);
+out:
+ dsa_put_dst(dst);
+
+ return err;
+}
+
+int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+ int err;
+
+ mutex_lock(&dsa2_mutex);
+ err = _dsa_register_switch(ds, np);
+ mutex_unlock(&dsa2_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dsa_register_switch);
+
+void _dsa_unregister_switch(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+
+ dsa_dst_unapply(dst);
+
+ dsa_dst_del_ds(dst, ds, ds->index);
+}
+
+void dsa_unregister_switch(struct dsa_switch *ds)
+{
+ mutex_lock(&dsa2_mutex);
+ _dsa_unregister_switch(ds);
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dfa3377..00077a9 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -50,12 +50,19 @@ struct dsa_slave_priv {
/* dsa.c */
extern char dsa_driver_version[];
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+ struct device_node *port_dn, int port);
+void dsa_cpu_dsa_destroy(struct device_node *port_dn);
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, char *name);
+ int port, const char *name);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 152436c..7236eb2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -49,8 +49,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->name = "dsa slave smi";
ds->slave_mii_bus->read = dsa_slave_phy_read;
ds->slave_mii_bus->write = dsa_slave_phy_write;
- snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
- ds->index, ds->cd->sw_addr);
+ snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
+ ds->dst->tree, ds->index);
ds->slave_mii_bus->parent = ds->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
@@ -522,14 +522,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- /* Just return the original SKB */
- return skb;
-}
-
-
/* ethtool operations *******************************************************/
static int
dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -615,7 +607,7 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->cd->eeprom_len)
+ if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
if (ds->drv->get_eeprom_len)
@@ -873,6 +865,13 @@ static void dsa_slave_poll_controller(struct net_device *dev)
}
#endif
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
+{
+ ops->get_sset_count = dsa_cpu_port_get_sset_count;
+ ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
+ ops->get_strings = dsa_cpu_port_get_strings;
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
@@ -893,8 +892,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee = dsa_slave_get_eee,
};
-static struct ethtool_ops dsa_cpu_port_ethtool_ops;
-
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
@@ -999,13 +996,12 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
struct net_device *slave_dev)
{
struct dsa_switch *ds = p->parent;
- struct dsa_chip_data *cd = ds->cd;
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
int mode, ret;
- port_dn = cd->port_dn[p->port];
+ port_dn = ds->ports[p->port].dn;
mode = of_get_phy_mode(port_dn);
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
@@ -1109,14 +1105,18 @@ int dsa_slave_resume(struct net_device *slave_dev)
}
int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, char *name)
+ int port, const char *name)
{
- struct net_device *master = ds->dst->master_netdev;
struct dsa_switch_tree *dst = ds->dst;
+ struct net_device *master;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
int ret;
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
NET_NAME_UNKNOWN, ether_setup);
if (slave_dev == NULL)
@@ -1124,19 +1124,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
- if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) {
- memcpy(&dst->master_ethtool_ops, master->ethtool_ops,
- sizeof(struct ethtool_ops));
- memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops,
- sizeof(struct ethtool_ops));
- dsa_cpu_port_ethtool_ops.get_sset_count =
- dsa_cpu_port_get_sset_count;
- dsa_cpu_port_ethtool_ops.get_ethtool_stats =
- dsa_cpu_port_get_ethtool_stats;
- dsa_cpu_port_ethtool_ops.get_strings =
- dsa_cpu_port_get_strings;
- master->ethtool_ops = &dsa_cpu_port_ethtool_ops;
- }
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
@@ -1147,49 +1134,24 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
NULL);
SET_NETDEV_DEV(slave_dev, parent);
- slave_dev->dev.of_node = ds->cd->port_dn[port];
+ slave_dev->dev.of_node = ds->ports[port].dn;
slave_dev->vlan_features = master->vlan_features;
p = netdev_priv(slave_dev);
p->parent = ds;
p->port = port;
-
- switch (ds->dst->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
- case DSA_TAG_PROTO_DSA:
- p->xmit = dsa_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- case DSA_TAG_PROTO_EDSA:
- p->xmit = edsa_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- case DSA_TAG_PROTO_TRAILER:
- p->xmit = trailer_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- case DSA_TAG_PROTO_BRCM:
- p->xmit = brcm_netdev_ops.xmit;
- break;
-#endif
- default:
- p->xmit = dsa_slave_notag_xmit;
- break;
- }
+ p->xmit = dst->tag_ops->xmit;
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- ds->ports[port] = slave_dev;
+ ds->ports[port].netdev = slave_dev;
ret = register_netdev(slave_dev);
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
ret, slave_dev->name);
- ds->ports[port] = NULL;
+ ds->ports[port].netdev = NULL;
free_netdev(slave_dev);
return ret;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2aadb7..21bffde 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -127,7 +127,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
/* Validate port against switch setup, either the port is totally */
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/* Remove Broadcom tag and update checksum */
@@ -140,7 +140,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb->protocol = eth_type_trans(skb, skb->dev);
skb->dev->stats.rx_packets++;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index aa780e4..bce79ff 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -107,10 +107,14 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that the source device exists and that the source
* port is a registered DSA port.
*/
- if (source_device >= dst->pd->nr_chips)
+ if (source_device >= DSA_MAX_SWITCHES)
goto out_drop;
+
ds = dst->ds[source_device];
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (!ds)
+ goto out_drop;
+
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/*
@@ -159,7 +163,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 2288c80..6c1720e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -120,10 +120,14 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that the source device exists and that the source
* port is a registered DSA port.
*/
- if (source_device >= dst->pd->nr_chips)
+ if (source_device >= DSA_MAX_SWITCHES)
goto out_drop;
+
ds = dst->ds[source_device];
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (!ds)
+ goto out_drop;
+
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/*
@@ -178,7 +182,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b6ca089..5e3903e 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -82,12 +82,12 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_drop;
source_port = trailer[1] & 7;
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
pskb_trim_rcsum(skb, skb->len - 4);
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index dd085db..4e2b308 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -58,21 +58,10 @@ static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
-static struct lock_class_key lowpan_tx_busylock;
-static struct lock_class_key lowpan_netdev_xmit_lock_key;
-
-static void lowpan_set_lockdep_class_one(struct net_device *ldev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock,
- &lowpan_netdev_xmit_lock_key);
-}
-
static int lowpan_dev_init(struct net_device *ldev)
{
- netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
- ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ netdev_lockdep_set_classes(ldev);
+
return 0;
}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2bda9e..6e9ea69 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
{
int err = -EAGAIN;
struct fib_table *tbl;
+ u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
rcu_read_lock();
- tbl = fib_get_table(rule->fr_net, rule->table);
+ tb_id = fib_rule_get_table(rule, arg);
+ tbl = fib_get_table(rule->fr_net, tb_id);
if (tbl)
err = fib_table_lookup(tbl, &flp->u.ip4,
(struct fib_result *)arg->result,
@@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (err)
goto errout;
- if (rule->table == RT_TABLE_UNSPEC) {
+ if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
if (rule->action == FR_ACT_TO_TBL) {
struct fib_table *table;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5f9207c..321d57f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -129,6 +129,36 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+ switch (guehdr->version) {
+ case 0: /* Full GUE header present */
+ break;
+
+ case 1: {
+ /* Direct encasulation of IPv4 or IPv6 */
+
+ int prot;
+
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ prot = IPPROTO_IPIP;
+ break;
+ case 6:
+ prot = IPPROTO_IPV6;
+ break;
+ default:
+ goto drop;
+ }
+
+ if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
+ goto drop;
+
+ return -prot;
+ }
+
+ default: /* Undefined version */
+ goto drop;
+ }
+
optlen = guehdr->hlen << 2;
len += optlen;
@@ -289,6 +319,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
int flush = 1;
struct fou *fou = fou_from_sock(sk);
struct gro_remcsum grc;
+ u8 proto;
skb_gro_remcsum_init(&grc);
@@ -302,6 +333,25 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
goto out;
}
+ switch (guehdr->version) {
+ case 0:
+ break;
+ case 1:
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ proto = IPPROTO_IPIP;
+ break;
+ case 6:
+ proto = IPPROTO_IPV6;
+ break;
+ default:
+ goto out;
+ }
+ goto next_proto;
+ default:
+ goto out;
+ }
+
optlen = guehdr->hlen << 2;
len += optlen;
@@ -370,6 +420,10 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
}
}
+ proto = guehdr->proto_ctype;
+
+next_proto:
+
/* We can clear the encap_mark for GUE as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
* header to the outer L3 tunnel header, or we are are simply
@@ -383,7 +437,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ ops = rcu_dereference(offloads[proto]);
if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock;
@@ -404,13 +458,30 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
const struct net_offload **offloads;
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
const struct net_offload *ops;
- unsigned int guehlen;
+ unsigned int guehlen = 0;
u8 proto;
int err = -ENOENT;
- proto = guehdr->proto_ctype;
-
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ switch (guehdr->version) {
+ case 0:
+ proto = guehdr->proto_ctype;
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ break;
+ case 1:
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ proto = IPPROTO_IPIP;
+ break;
+ case 6:
+ proto = IPPROTO_IPV6;
+ break;
+ default:
+ return err;
+ }
+ break;
+ default:
+ return err;
+ }
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3a88b0c..b5e9317 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -355,7 +355,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
{
struct inet_frag_queue *q;
- if (frag_mem_limit(nf) > nf->high_thresh) {
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
inet_frag_schedule_worker(f);
return NULL;
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index cbfb180..9f0a7b9 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,7 +54,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 124bf0a..cbac493 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -225,7 +225,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
- skb_gso_network_seglen(skb) <= mtu)
+ skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6c8f4cd0..89dd8d8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5159,6 +5159,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
+ bool rst_seq_match = false;
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
@@ -5195,13 +5196,32 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
/* Step 2: check RST bit */
if (th->rst) {
- /* RFC 5961 3.2 :
- * If sequence number exactly matches RCV.NXT, then
+ /* RFC 5961 3.2 (extend to match against SACK too if available):
+ * If seq num matches RCV.NXT or the right-most SACK block,
+ * then
* RESET the connection
* else
* Send a challenge ACK
*/
- if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
+ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+ rst_seq_match = true;
+ } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+ struct tcp_sack_block *sp = &tp->selective_acks[0];
+ int max_sack = sp[0].end_seq;
+ int this_sack;
+
+ for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
+ ++this_sack) {
+ max_sack = after(sp[this_sack].end_seq,
+ max_sack) ?
+ sp[this_sack].end_seq : max_sack;
+ }
+
+ if (TCP_SKB_CB(skb)->seq == max_sack)
+ rst_seq_match = true;
+ }
+
+ if (rst_seq_match)
tcp_reset(sk);
else
tcp_send_challenge_ack(sk, skb);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bfa86f0..2076c21 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -92,6 +92,12 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
+bool ipv6_mod_enabled(void)
+{
+ return disable_ipv6_mod == 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
+
static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ed33abf..5857c1f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
int err = 0;
+ u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto discard_pkt;
}
- table = fib6_get_table(net, rule->table);
+ tb_id = fib_rule_get_table(rule, arg);
+ table = fib6_get_table(net, tb_id);
if (!table) {
err = -EAGAIN;
goto out;
@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
- if (rule->action == FR_ACT_TO_TBL) {
+ if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index d08fd2d..e0170f6 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -109,7 +109,8 @@ static inline bool ila_csum_neutral_set(struct ila_identifier ident)
return !!(ident.csum_neutral);
}
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral);
void ila_init_saved_csum(struct ila_params *p);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 0e94042..b3d00be 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -103,7 +103,8 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
iaddr->loc = p->locator;
}
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
@@ -114,7 +115,8 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
* is a locator being translated to a SIR address.
* Perform (receiver) checksum-neutral translation.
*/
- ila_csum_do_neutral(iaddr, p);
+ if (!set_csum_neutral)
+ ila_csum_do_neutral(iaddr, p);
} else {
switch (p->csum_mode) {
case ILA_CSUM_ADJUST_TRANSPORT:
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 1dfb641..c8314c6 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
return dst->lwtstate->orig_output(net, sk, skb);
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
return dst->lwtstate->orig_input(skb);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index a90e572..e6eca5f 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -210,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb);
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
static unsigned int
ila_nf_input(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- ila_xlat_addr(skb);
+ ila_xlat_addr(skb, false);
return NF_ACCEPT;
}
@@ -597,7 +597,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb)
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,7 +616,7 @@ static int ila_xlat_addr(struct sk_buff *skb)
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 94611e4..aacfb4b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -323,6 +323,7 @@ int ip6_input(struct sk_buff *skb)
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
+EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 635b8d3..fd32175 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0a5a255..d9f2bd6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -825,9 +825,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
u8 protocol = IPPROTO_IPV6;
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- if (skb->protocol != htons(ETH_P_IPV6))
- goto tx_error;
-
if (tos == 1)
tos = ipv6_get_dsfield(iph6);
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index e253c26..57fc5a4 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -67,7 +67,6 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
return net_generic(net, l2tp_eth_net_id);
}
-static struct lock_class_key l2tp_eth_tx_busylock;
static int l2tp_eth_dev_init(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
@@ -75,7 +74,8 @@ static int l2tp_eth_dev_init(struct net_device *dev)
priv->dev = dev;
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
- dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
+ netdev_lockdep_set_classes(dev);
+
return 0;
}
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 6651a78..7da9780 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -10,6 +10,7 @@
*/
#include <linux/netdevice.h>
+#include <net/fib_rules.h>
#include <net/l3mdev.h>
/**
@@ -160,3 +161,40 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
return rc;
}
EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
+
+/**
+ * l3mdev_fib_rule_match - Determine if flowi references an
+ * L3 master device
+ * @net: network namespace for device index lookup
+ * @fl: flow struct
+ */
+
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+ struct fib_lookup_arg *arg)
+{
+ struct net_device *dev;
+ int rc = 0;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_fib_table) {
+ arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ rc = 1;
+ goto out;
+ }
+
+ dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_fib_table) {
+ arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ rc = 1;
+ goto out;
+ }
+
+out:
+ rcu_read_unlock();
+
+ return rc;
+}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0b80a71..7a4aa34 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -91,7 +91,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->len <= mtu)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 604df6f..515131f 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -137,7 +137,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.ewma_log = info->ewma_log;
ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
- &est->lock, &cfg.opt);
+ &est->lock, NULL, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index e68ef9c..3cfd6cc 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,20 +8,6 @@
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
-struct netlink_ring {
- void **pg_vec;
- unsigned int head;
- unsigned int frames_per_block;
- unsigned int frame_size;
- unsigned int frame_max;
-
- unsigned int pg_vec_order;
- unsigned int pg_vec_pages;
- unsigned int pg_vec_len;
-
- atomic_t pending;
-};
-
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 2ee48e4..434e04c 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -195,7 +195,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
}
vport->dev = alloc_netdev(sizeof(struct internal_dev),
- parms->name, NET_NAME_UNKNOWN, do_setup);
+ parms->name, NET_NAME_USER, do_setup);
if (!vport->dev) {
err = -ENOMEM;
goto error_free_vport;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e45e94c..38512a2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/net.h>
@@ -137,33 +139,33 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
lock_sock(&rx->sk);
- if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+ if (rx->sk.sk_state != RXRPC_UNBOUND) {
ret = -EINVAL;
goto error_unlock;
}
memcpy(&rx->srx, srx, sizeof(rx->srx));
- /* Find or create a local transport endpoint to use */
local = rxrpc_lookup_local(&rx->srx);
if (IS_ERR(local)) {
ret = PTR_ERR(local);
goto error_unlock;
}
- rx->local = local;
- if (srx->srx_service) {
+ if (rx->srx.srx_service) {
write_lock_bh(&local->services_lock);
list_for_each_entry(prx, &local->services, listen_link) {
- if (prx->srx.srx_service == srx->srx_service)
+ if (prx->srx.srx_service == rx->srx.srx_service)
goto service_in_use;
}
+ rx->local = local;
list_add_tail(&rx->listen_link, &local->services);
write_unlock_bh(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
} else {
+ rx->local = local;
rx->sk.sk_state = RXRPC_CLIENT_BOUND;
}
@@ -172,8 +174,9 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
return 0;
service_in_use:
- ret = -EADDRINUSE;
write_unlock_bh(&local->services_lock);
+ rxrpc_put_local(local);
+ ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
error:
@@ -195,11 +198,11 @@ static int rxrpc_listen(struct socket *sock, int backlog)
lock_sock(&rx->sk);
switch (rx->sk.sk_state) {
- case RXRPC_UNCONNECTED:
+ case RXRPC_UNBOUND:
ret = -EADDRNOTAVAIL;
break;
+ case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
- case RXRPC_CLIENT_CONNECTED:
default:
ret = -EBUSY;
break;
@@ -219,20 +222,18 @@ static int rxrpc_listen(struct socket *sock, int backlog)
/*
* find a transport by address
*/
-static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
- struct sockaddr *addr,
- int addr_len, int flags,
- gfp_t gfp)
+struct rxrpc_transport *rxrpc_name_to_transport(struct rxrpc_sock *rx,
+ struct sockaddr *addr,
+ int addr_len, int flags,
+ gfp_t gfp)
{
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
struct rxrpc_transport *trans;
- struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct rxrpc_peer *peer;
_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
ASSERT(rx->local != NULL);
- ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
if (rx->srx.transport_type != srx->transport_type)
return ERR_PTR(-ESOCKTNOSUPPORT);
@@ -254,7 +255,7 @@ static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
/**
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
* @sock: The socket on which to make the call
- * @srx: The address of the peer to contact (defaults to socket setting)
+ * @srx: The address of the peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
*
@@ -280,25 +281,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
lock_sock(&rx->sk);
- if (srx) {
- trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
- sizeof(*srx), 0, gfp);
- if (IS_ERR(trans)) {
- call = ERR_CAST(trans);
- trans = NULL;
- goto out_notrans;
- }
- } else {
- trans = rx->trans;
- if (!trans) {
- call = ERR_PTR(-ENOTCONN);
- goto out_notrans;
- }
- atomic_inc(&trans->usage);
+ trans = rxrpc_name_to_transport(rx, (struct sockaddr *)srx,
+ sizeof(*srx), 0, gfp);
+ if (IS_ERR(trans)) {
+ call = ERR_CAST(trans);
+ trans = NULL;
+ goto out_notrans;
}
- if (!srx)
- srx = &rx->srx;
if (!key)
key = rx->key;
if (key && !key->payload.data[0])
@@ -310,8 +300,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
goto out;
}
- call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
- gfp);
+ call = rxrpc_new_client_call(rx, trans, bundle, user_call_ID, gfp);
rxrpc_put_bundle(trans, bundle);
out:
rxrpc_put_transport(trans);
@@ -367,11 +356,8 @@ EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
int addr_len, int flags)
{
- struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
- struct sock *sk = sock->sk;
- struct rxrpc_transport *trans;
- struct rxrpc_local *local;
- struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
@@ -384,45 +370,28 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
lock_sock(&rx->sk);
+ ret = -EISCONN;
+ if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags))
+ goto error;
+
switch (rx->sk.sk_state) {
- case RXRPC_UNCONNECTED:
- /* find a local transport endpoint if we don't have one already */
- ASSERTCMP(rx->local, ==, NULL);
- rx->srx.srx_family = AF_RXRPC;
- rx->srx.srx_service = 0;
- rx->srx.transport_type = srx->transport_type;
- rx->srx.transport_len = sizeof(sa_family_t);
- rx->srx.transport.family = srx->transport.family;
- local = rxrpc_lookup_local(&rx->srx);
- if (IS_ERR(local)) {
- release_sock(&rx->sk);
- return PTR_ERR(local);
- }
- rx->local = local;
- rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ case RXRPC_UNBOUND:
+ rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
break;
- case RXRPC_CLIENT_CONNECTED:
- release_sock(&rx->sk);
- return -EISCONN;
default:
- release_sock(&rx->sk);
- return -EBUSY; /* server sockets can't connect as well */
- }
-
- trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
- GFP_KERNEL);
- if (IS_ERR(trans)) {
- release_sock(&rx->sk);
- _leave(" = %ld", PTR_ERR(trans));
- return PTR_ERR(trans);
+ ret = -EBUSY;
+ goto error;
}
- rx->trans = trans;
- rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+ rx->connect_srx = *srx;
+ set_bit(RXRPC_SOCK_CONNECTED, &rx->flags);
+ ret = 0;
+error:
release_sock(&rx->sk);
- return 0;
+ return ret;
}
/*
@@ -436,7 +405,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
*/
static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
- struct rxrpc_transport *trans;
+ struct rxrpc_local *local;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
@@ -453,48 +422,38 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
}
}
- trans = NULL;
lock_sock(&rx->sk);
- if (m->msg_name) {
- ret = -EISCONN;
- trans = rxrpc_name_to_transport(sock, m->msg_name,
- m->msg_namelen, 0, GFP_KERNEL);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
- } else {
- trans = rx->trans;
- if (trans)
- atomic_inc(&trans->usage);
- }
-
switch (rx->sk.sk_state) {
- case RXRPC_SERVER_LISTENING:
- if (!m->msg_name) {
- ret = rxrpc_server_sendmsg(rx, m, len);
- break;
+ case RXRPC_UNBOUND:
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
}
- case RXRPC_SERVER_BOUND:
+
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ /* Fall through */
+
+ case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
- if (!m->msg_name) {
- ret = -ENOTCONN;
- break;
+ if (!m->msg_name &&
+ test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
+ m->msg_name = &rx->connect_srx;
+ m->msg_namelen = sizeof(rx->connect_srx);
}
- case RXRPC_CLIENT_CONNECTED:
- ret = rxrpc_client_sendmsg(rx, trans, m, len);
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_LISTENING:
+ ret = rxrpc_do_sendmsg(rx, m, len);
break;
default:
- ret = -ENOTCONN;
+ ret = -EINVAL;
break;
}
-out:
+error_unlock:
release_sock(&rx->sk);
- if (trans)
- rxrpc_put_transport(trans);
_leave(" = %d", ret);
return ret;
}
@@ -521,7 +480,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (optlen != 0)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
goto success;
@@ -531,7 +490,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->key)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = rxrpc_request_key(rx, optval, optlen);
goto error;
@@ -541,7 +500,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->key)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = rxrpc_server_keyring(rx, optval, optlen);
goto error;
@@ -551,7 +510,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(unsigned int))
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = get_user(min_sec_level,
(unsigned int __user *) optval);
@@ -630,7 +589,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
return -ENOMEM;
sock_init_data(sock, sk);
- sk->sk_state = RXRPC_UNCONNECTED;
+ sk->sk_state = RXRPC_UNBOUND;
sk->sk_write_space = rxrpc_write_space;
sk->sk_max_ack_backlog = sysctl_rxrpc_max_qlen;
sk->sk_destruct = rxrpc_sock_destructor;
@@ -703,14 +662,6 @@ static int rxrpc_release_sock(struct sock *sk)
rx->conn = NULL;
}
- if (rx->bundle) {
- rxrpc_put_bundle(rx->trans, rx->bundle);
- rx->bundle = NULL;
- }
- if (rx->trans) {
- rxrpc_put_transport(rx->trans);
- rx->trans = NULL;
- }
if (rx->local) {
rxrpc_put_local(rx->local);
rx->local = NULL;
@@ -796,49 +747,49 @@ static int __init af_rxrpc_init(void)
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!rxrpc_call_jar) {
- printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+ pr_notice("Failed to allocate call jar\n");
goto error_call_jar;
}
rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
if (!rxrpc_workqueue) {
- printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+ pr_notice("Failed to allocate work queue\n");
goto error_work_queue;
}
ret = rxrpc_init_security();
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot initialise security\n");
+ pr_crit("Cannot initialise security\n");
goto error_security;
}
ret = proto_register(&rxrpc_proto, 1);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+ pr_crit("Cannot register protocol\n");
goto error_proto;
}
ret = sock_register(&rxrpc_family_ops);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+ pr_crit("Cannot register socket family\n");
goto error_sock;
}
ret = register_key_type(&key_type_rxrpc);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+ pr_crit("Cannot register client key type\n");
goto error_key_type;
}
ret = register_key_type(&key_type_rxrpc_s);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+ pr_crit("Cannot register server key type\n");
goto error_key_type_s;
}
ret = rxrpc_sysctl_init();
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+ pr_crit("Cannot register sysctls\n");
goto error_sysctls;
}
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index e7a7f05..eea5f4a 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 374478e..1838178 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/circ_buf.h>
#include <linux/net.h>
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index 571a41f..68125dc 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
@@ -194,6 +196,43 @@ struct rxrpc_call *rxrpc_find_call_hash(
}
/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p,%lx", rx, user_call_ID);
+
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
* allocate a new call
*/
static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
@@ -309,51 +348,27 @@ static struct rxrpc_call *rxrpc_alloc_client_call(
* set up a call for the given data
* - called in process context with IRQs enabled
*/
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_transport *trans,
struct rxrpc_conn_bundle *bundle,
unsigned long user_call_ID,
- int create,
gfp_t gfp)
{
- struct rxrpc_call *call, *candidate;
- struct rb_node *p, *parent, **pp;
-
- _enter("%p,%d,%d,%lx,%d",
- rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
- user_call_ID, create);
-
- /* search the extant calls first for one that matches the specified
- * user ID */
- read_lock(&rx->call_lock);
-
- p = rx->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
-
- if (user_call_ID < call->user_call_ID)
- p = p->rb_left;
- else if (user_call_ID > call->user_call_ID)
- p = p->rb_right;
- else
- goto found_extant_call;
- }
-
- read_unlock(&rx->call_lock);
+ struct rxrpc_call *call, *xcall;
+ struct rb_node *parent, **pp;
- if (!create || !trans)
- return ERR_PTR(-EBADSLT);
+ _enter("%p,%d,%d,%lx",
+ rx, trans->debug_id, bundle ? bundle->debug_id : -1,
+ user_call_ID);
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
- if (IS_ERR(candidate)) {
- _leave(" = %ld", PTR_ERR(candidate));
- return candidate;
+ call = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
+ if (IS_ERR(call)) {
+ _leave(" = %ld", PTR_ERR(call));
+ return call;
}
- candidate->user_call_ID = user_call_ID;
- __set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+ call->user_call_ID = user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
write_lock(&rx->call_lock);
@@ -361,19 +376,16 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
parent = NULL;
while (*pp) {
parent = *pp;
- call = rb_entry(parent, struct rxrpc_call, sock_node);
+ xcall = rb_entry(parent, struct rxrpc_call, sock_node);
- if (user_call_ID < call->user_call_ID)
+ if (user_call_ID < xcall->user_call_ID)
pp = &(*pp)->rb_left;
- else if (user_call_ID > call->user_call_ID)
+ else if (user_call_ID > xcall->user_call_ID)
pp = &(*pp)->rb_right;
else
- goto found_extant_second;
+ goto found_user_ID_now_present;
}
- /* second search also failed; add the new call */
- call = candidate;
- candidate = NULL;
rxrpc_get_call(call);
rb_link_node(&call->sock_node, parent, pp);
@@ -389,20 +401,16 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
_leave(" = %p [new]", call);
return call;
- /* we found the call in the list immediately */
-found_extant_call:
- rxrpc_get_call(call);
- read_unlock(&rx->call_lock);
- _leave(" = %p [extant %d]", call, atomic_read(&call->usage));
- return call;
-
- /* we found the call on the second time through the list */
-found_extant_second:
- rxrpc_get_call(call);
+ /* We unexpectedly found the user ID in the list after taking
+ * the call_lock. This shouldn't happen unless the user races
+ * with itself and tries to add the same user ID twice at the
+ * same time in different threads.
+ */
+found_user_ID_now_present:
write_unlock(&rx->call_lock);
- rxrpc_put_call(candidate);
- _leave(" = %p [second %d]", call, atomic_read(&call->usage));
- return call;
+ rxrpc_put_call(call);
+ _leave(" = -EEXIST [%p]", call);
+ return ERR_PTR(-EEXIST);
}
/*
@@ -564,46 +572,6 @@ old_call:
}
/*
- * find an extant server call
- * - called in process context with IRQs enabled
- */
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
- unsigned long user_call_ID)
-{
- struct rxrpc_call *call;
- struct rb_node *p;
-
- _enter("%p,%lx", rx, user_call_ID);
-
- /* search the extant calls for one that matches the specified user
- * ID */
- read_lock(&rx->call_lock);
-
- p = rx->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
-
- if (user_call_ID < call->user_call_ID)
- p = p->rb_left;
- else if (user_call_ID > call->user_call_ID)
- p = p->rb_right;
- else
- goto found_extant_call;
- }
-
- read_unlock(&rx->call_lock);
- _leave(" = NULL");
- return NULL;
-
- /* we found the call in the list immediately */
-found_extant_call:
- rxrpc_get_call(call);
- read_unlock(&rx->call_lock);
- _leave(" = %p [%d]", call, atomic_read(&call->usage));
- return call;
-}
-
-/*
* detach a call from a socket and set up for release
*/
void rxrpc_release_call(struct rxrpc_call *call)
@@ -669,8 +637,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
conn->channels[3] == NULL);
break;
default:
- printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
- conn->avail_calls);
+ pr_err("conn->avail_calls=%d\n", conn->avail_calls);
BUG();
}
}
@@ -935,16 +902,15 @@ void __exit rxrpc_destroy_all_calls(void)
if (call->state != RXRPC_CALL_DEAD)
break;
default:
- printk(KERN_ERR "RXRPC:"
- " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+ pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
call, atomic_read(&call->usage),
atomic_read(&call->ackr_not_idle),
rxrpc_call_states[call->state],
call->flags, call->events);
if (!skb_queue_empty(&call->rx_queue))
- printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+ pr_err("Rx queue occupied\n");
if (!skb_queue_empty(&call->rx_oos_queue))
- printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+ pr_err("OOS queue occupied\n");
break;
}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 97f4fae..8ecde4b 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/net.h>
@@ -78,11 +80,6 @@ struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
_enter("%p{%x},%x,%hx,",
rx, key_serial(key), trans->debug_id, service_id);
- if (rx->trans == trans && rx->bundle) {
- atomic_inc(&rx->bundle->usage);
- return rx->bundle;
- }
-
/* search the extant bundles first for one that matches the specified
* user ID */
spin_lock(&trans->client_lock);
@@ -136,10 +133,6 @@ struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
rb_insert_color(&bundle->node, &trans->bundles);
spin_unlock(&trans->client_lock);
_net("BUNDLE new on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
_leave(" = %p [new]", bundle);
return bundle;
@@ -148,10 +141,6 @@ found_extant_bundle:
atomic_inc(&bundle->usage);
spin_unlock(&trans->client_lock);
_net("BUNDLE old on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
_leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
return bundle;
@@ -161,10 +150,6 @@ found_extant_second:
spin_unlock(&trans->client_lock);
kfree(candidate);
_net("BUNDLE old2 on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
_leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
return bundle;
}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
index 5f95639..8bdd692 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/ar-connevent.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 6ff9741..d7c2a0b 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f0b807a..b89dcdc 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -39,9 +39,9 @@ struct rxrpc_crypt {
* sk_state for RxRPC sockets
*/
enum {
- RXRPC_UNCONNECTED = 0,
+ RXRPC_UNBOUND = 0,
+ RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */
RXRPC_CLIENT_BOUND, /* client local address bound */
- RXRPC_CLIENT_CONNECTED, /* client is connected */
RXRPC_SERVER_BOUND, /* server local address bound */
RXRPC_SERVER_LISTENING, /* server listening for connections */
RXRPC_CLOSE, /* socket is being closed */
@@ -55,8 +55,6 @@ struct rxrpc_sock {
struct sock sk;
rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
struct rxrpc_local *local; /* local endpoint */
- struct rxrpc_transport *trans; /* transport handler */
- struct rxrpc_conn_bundle *bundle; /* virtual connection bundle */
struct rxrpc_connection *conn; /* exclusive virtual connection */
struct list_head listen_link; /* link in the local endpoint's listen list */
struct list_head secureq; /* calls awaiting connection security clearance */
@@ -65,11 +63,13 @@ struct rxrpc_sock {
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* outstanding calls on this socket */
unsigned long flags;
+#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */
#define RXRPC_SOCK_EXCLUSIVE_CONN 1 /* exclusive connection for a client socket */
rwlock_t call_lock; /* lock for calls */
u32 min_sec_level; /* minimum security level */
#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
struct sockaddr_rxrpc srx; /* local address */
+ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */
sa_family_t proto; /* protocol created with */
};
@@ -477,6 +477,10 @@ extern u32 rxrpc_epoch;
extern atomic_t rxrpc_debug_id;
extern struct workqueue_struct *rxrpc_workqueue;
+extern struct rxrpc_transport *rxrpc_name_to_transport(struct rxrpc_sock *,
+ struct sockaddr *,
+ int, int, gfp_t);
+
/*
* ar-accept.c
*/
@@ -502,14 +506,14 @@ extern rwlock_t rxrpc_call_lock;
struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
void *, sa_family_t, const void *);
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_transport *,
struct rxrpc_conn_bundle *,
- unsigned long, int, gfp_t);
+ unsigned long, gfp_t);
struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
struct rxrpc_connection *,
struct rxrpc_host_header *);
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
void rxrpc_release_call(struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
void __rxrpc_put_call(struct rxrpc_call *);
@@ -581,9 +585,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
extern unsigned int rxrpc_resend_timeout;
int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
- struct msghdr *, size_t);
-int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
* ar-peer.c
@@ -744,21 +746,18 @@ do { \
#define ASSERT(X) \
do { \
if (unlikely(!(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
#define ASSERTCMP(X, OP, Y) \
do { \
- if (unlikely(!((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
- printk(KERN_ERR "%lu " #OP " %lu is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
+ unsigned long _x = (unsigned long)(X); \
+ unsigned long _y = (unsigned long)(Y); \
+ if (unlikely(!(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ _x, _x, #OP, _y, _y); \
BUG(); \
} \
} while (0)
@@ -766,21 +765,18 @@ do { \
#define ASSERTIF(C, X) \
do { \
if (unlikely((C) && !(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
#define ASSERTIFCMP(C, X, OP, Y) \
do { \
- if (unlikely((C) && !((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
- printk(KERN_ERR "%lu " #OP " %lu is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
+ unsigned long _x = (unsigned long)(X); \
+ unsigned long _y = (unsigned long)(Y); \
+ if (unlikely((C) && !(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ _x, _x, #OP, _y, _y); \
BUG(); \
} \
} while (0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 1021b4c..4ad56fa 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -12,6 +12,8 @@
* "afs@CAMBRIDGE.REDHAT.COM>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
@@ -800,7 +802,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
rxrpc_rxk5_free(token->k5);
break;
default:
- printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
+ pr_err("Unknown token type %x on rxrpc key\n",
token->security_index);
BUG();
}
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 4e1e6db..701c42b 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 51cb100..2e3c406 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/net.h>
#include <linux/gfp.h>
#include <linux/skbuff.h>
@@ -30,13 +32,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
/*
* extract control messages from the sendmsg() control buffer
*/
-static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
unsigned long *user_call_ID,
enum rxrpc_command *command,
- u32 *abort_code,
- bool server)
+ u32 *abort_code)
{
struct cmsghdr *cmsg;
+ bool got_user_ID = false;
int len;
*command = RXRPC_CMD_SEND_DATA;
@@ -68,6 +70,7 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
CMSG_DATA(cmsg);
}
_debug("User Call ID %lx", *user_call_ID);
+ got_user_ID = true;
break;
case RXRPC_ABORT:
@@ -88,8 +91,6 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
*command = RXRPC_CMD_ACCEPT;
if (len != 0)
return -EINVAL;
- if (!server)
- return -EISCONN;
break;
default:
@@ -97,6 +98,8 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
}
}
+ if (!got_user_ID)
+ return -EINVAL;
_leave(" = 0");
return 0;
}
@@ -124,55 +127,96 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
}
/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_conn_bundle *bundle;
+ struct rxrpc_transport *trans;
+ struct rxrpc_call *call;
+ struct key *key;
+ long ret;
+
+ DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+ _enter("");
+
+ if (!msg->msg_name)
+ return ERR_PTR(-EDESTADDRREQ);
+
+ trans = rxrpc_name_to_transport(rx, msg->msg_name, msg->msg_namelen, 0,
+ GFP_KERNEL);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ key = rx->key;
+ if (key && !rx->key->payload.data[0])
+ key = NULL;
+ bundle = rxrpc_get_bundle(rx, trans, key, srx->srx_service, GFP_KERNEL);
+ if (IS_ERR(bundle)) {
+ ret = PTR_ERR(bundle);
+ goto out_trans;
+ }
+
+ call = rxrpc_new_client_call(rx, trans, bundle, user_call_ID,
+ GFP_KERNEL);
+ rxrpc_put_bundle(trans, bundle);
+ rxrpc_put_transport(trans);
+ if (IS_ERR(call)) {
+ ret = PTR_ERR(call);
+ goto out_trans;
+ }
+
+ _leave(" = %p\n", call);
+ return call;
+
+out_trans:
+ rxrpc_put_transport(trans);
+out:
+ _leave(" = %ld", ret);
+ return ERR_PTR(ret);
+}
+
+/*
* send a message forming part of a client call through an RxRPC socket
* - caller holds the socket locked
* - the socket may be either a client socket or a server socket
*/
-int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
- struct msghdr *msg, size_t len)
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
{
- struct rxrpc_conn_bundle *bundle;
enum rxrpc_command cmd;
struct rxrpc_call *call;
unsigned long user_call_ID = 0;
- struct key *key;
- u16 service_id;
u32 abort_code = 0;
int ret;
_enter("");
- ASSERT(trans != NULL);
-
- ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
- false);
+ ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code);
if (ret < 0)
return ret;
- bundle = NULL;
- if (trans) {
- service_id = rx->srx.srx_service;
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
- msg->msg_name);
- service_id = srx->srx_service;
- }
- key = rx->key;
- if (key && !rx->key->payload.data[0])
- key = NULL;
- bundle = rxrpc_get_bundle(rx, trans, key, service_id,
- GFP_KERNEL);
- if (IS_ERR(bundle))
- return PTR_ERR(bundle);
+ if (cmd == RXRPC_CMD_ACCEPT) {
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+ return -EINVAL;
+ call = rxrpc_accept_call(rx, user_call_ID);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ rxrpc_put_call(call);
+ return 0;
}
- call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
- abort_code == 0, GFP_KERNEL);
- if (trans)
- rxrpc_put_bundle(trans, bundle);
- if (IS_ERR(call)) {
- _leave(" = %ld", PTR_ERR(call));
- return PTR_ERR(call);
+ call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+ if (!call) {
+ if (cmd != RXRPC_CMD_SEND_DATA)
+ return -EBADSLT;
+ call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
}
_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -180,14 +224,21 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
if (call->state >= RXRPC_CALL_COMPLETE) {
/* it's too late for this call */
- ret = -ESHUTDOWN;
+ ret = -ECONNRESET;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
rxrpc_send_abort(call, abort_code);
+ ret = 0;
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ } else if (!call->in_clientflag &&
+ call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
/* request phase complete for this client call */
ret = -EPROTO;
+ } else if (call->in_clientflag &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Reply phase not begun or not complete for service call. */
+ ret = -EPROTO;
} else {
ret = rxrpc_send_data(rx, call, msg, len);
}
@@ -266,67 +317,6 @@ void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
/*
- * send a message through a server socket
- * - caller holds the socket locked
- */
-int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
- enum rxrpc_command cmd;
- struct rxrpc_call *call;
- unsigned long user_call_ID = 0;
- u32 abort_code = 0;
- int ret;
-
- _enter("");
-
- ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
- true);
- if (ret < 0)
- return ret;
-
- if (cmd == RXRPC_CMD_ACCEPT) {
- call = rxrpc_accept_call(rx, user_call_ID);
- if (IS_ERR(call))
- return PTR_ERR(call);
- rxrpc_put_call(call);
- return 0;
- }
-
- call = rxrpc_find_server_call(rx, user_call_ID);
- if (!call)
- return -EBADSLT;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -ESHUTDOWN;
- goto out;
- }
-
- switch (cmd) {
- case RXRPC_CMD_SEND_DATA:
- if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Tx phase not yet begun for this call */
- ret = -EPROTO;
- break;
- }
-
- ret = rxrpc_send_data(rx, call, msg, len);
- break;
-
- case RXRPC_CMD_SEND_ABORT:
- rxrpc_send_abort(call, abort_code);
- break;
- default:
- BUG();
- }
-
- out:
- rxrpc_put_call(call);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* send a packet through the transport endpoint
*/
int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index dc089b1..0b54cda 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 160f092..59706b9 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/export.h>
@@ -307,7 +309,7 @@ receive_non_data_message:
&abort_code);
break;
default:
- pr_err("RxRPC: Unknown packet mark %u\n", skb->mark);
+ pr_err("Unknown packet mark %u\n", skb->mark);
BUG();
break;
}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index 62a2674..eee0cfd9 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 66a1a56..a1b6518 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index bab56ed..36a6340 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 336774a..b6db56e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -283,10 +283,11 @@ err2:
p->tcfc_index = index ? index : tcf_hash_new_index(tn);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
+ p->tcfc_tm.firstuse = 0;
if (est) {
err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
&p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ &p->tcfc_lock, NULL, est);
if (err) {
free_percpu(p->cpu_qstats);
goto err2;
@@ -503,8 +504,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(tcf_action_dump_1);
-int
-tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
+int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+ int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
@@ -670,7 +671,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
@@ -687,9 +688,9 @@ errout:
return -1;
}
-static int
-tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
- u16 flags, int event, int bind, int ref)
+static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+ u32 portid, u32 seq, u16 flags, int event, int bind,
+ int ref)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -730,7 +731,8 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
+ 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -838,7 +840,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (a.ops == NULL) /*some idjot trying to flush unknown action */
goto err_out;
- nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+ nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
+ sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
@@ -1001,7 +1004,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
- if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETACTION) &&
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7123e0..f7b6cf4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -154,10 +154,7 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
if (ret)
goto nla_put_failure;
- tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
- tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
- tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
-
+ tcf_tm_dump(&tm, &prog->tcf_tm);
if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
TCA_ACT_BPF_PAD))
goto nla_put_failure;
@@ -172,7 +169,8 @@ nla_put_failure:
static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
[TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
[TCA_ACT_BPF_FD] = { .type = NLA_U32 },
- [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
+ [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING,
+ .len = ACT_BPF_NAME_LEN },
[TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2ba700c..35a5270 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -44,7 +44,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
int proto;
spin_lock(&ca->tcf_lock);
- ca->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -160,9 +160,7 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+ tcf_tm_dump(&t, &ci->tcf_tm);
if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
TCA_CONNMARK_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 28e934e..dcd9aba 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -501,7 +501,7 @@ static int tcf_csum(struct sk_buff *skb,
u32 update_flags;
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
bstats_update(&p->tcf_bstats, skb);
action = p->tcf_action;
update_flags = p->update_flags;
@@ -546,9 +546,8 @@ static int tcf_csum_dump(struct sk_buff *skb,
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ec5cc84..19058a7 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -162,7 +162,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
tm->lastuse = lastuse;
}
-static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_gact *gact = a->priv;
@@ -188,9 +189,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
goto nla_put_failure;
}
#endif
- t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
+ tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 658046d..02f5a8b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -553,9 +553,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
+ tcf_tm_dump(&t, &ife->tcf_tm);
if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
@@ -623,7 +621,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
spin_unlock(&ife->tcf_lock);
ifehdrln = ntohs(ifehdrln);
@@ -711,7 +709,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
@@ -802,7 +800,7 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
pr_info_ratelimited("unknown failure(policy neither de/encode\n");
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9f002ad..e7c0f4d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -34,7 +34,8 @@ static int ipt_net_id;
static int xt_net_id;
-static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct xt_entry_target *t, char *table,
+ unsigned int hook)
{
struct xt_tgchk_param par;
struct xt_target *target;
@@ -212,7 +213,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ipt->tcf_lock);
- ipt->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ipt->tcf_tm);
bstats_update(&ipt->tcf_bstats, skb);
/* yes, we have to worry about both in and out dev
@@ -250,7 +251,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
}
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+ int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ipt *ipt = a->priv;
@@ -277,11 +279,11 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
goto nla_put_failure;
- tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
- tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
- tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
+
+ tcf_tm_dump(&tm, &ipt->tcf_tm);
if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
goto nla_put_failure;
+
kfree(t);
return skb->len;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 128942b..787751a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -157,7 +157,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
u32 at;
tcf_lastuse_update(&m->tcf_tm);
-
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
@@ -219,9 +218,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &m->tcf_tm);
if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c0a879f..06ccb03 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -103,7 +103,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
old_addr = p->old_addr;
new_addr = p->new_addr;
mask = p->mask;
@@ -264,9 +264,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c6e18f2..82d3c14 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -121,7 +121,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
struct tc_pedit_key *tkey = p->tcfp_keys;
@@ -200,11 +200,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
goto nla_put_failure;
+
kfree(opt);
return skb->len;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c557789..ff34dd3 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,7 +182,8 @@ override:
if (est) {
err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
- &police->tcf_lock, est);
+ &police->tcf_lock,
+ NULL, est);
if (err)
goto failure_unlock;
} else if (tb[TCA_POLICE_AVRATE] &&
@@ -336,6 +337,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
+ t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e42f8da..be5fbb5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -35,7 +35,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
struct tcf_defact *d = a->priv;
spin_lock(&d->tcf_lock);
- d->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&d->tcf_tm);
bstats_update(&d->tcf_bstats, skb);
/* print policy string followed by _ then packet count
@@ -158,9 +158,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) ||
nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e928802..7e2bc3c 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -37,7 +37,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_skbedit *d = a->priv;
spin_lock(&d->tcf_lock);
- d->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&d->tcf_tm);
bstats_update(&d->tcf_bstats, skb);
if (d->flags & SKBEDIT_F_PRIORITY)
@@ -168,9 +168,8 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
&d->mark))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac4adc8..b075d50 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -31,7 +31,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
int err;
spin_lock(&v->tcf_lock);
- v->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&v->tcf_tm);
bstats_update(&v->tcf_bstats, skb);
action = v->tcf_action;
@@ -179,12 +179,11 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
- nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto)))
+ nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
+ v->tcfv_push_proto)))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(v->tcf_tm.expires);
+ tcf_tm_dump(&t, &v->tcf_tm);
if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a75864d..aafa6bce 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -351,8 +351,9 @@ errout:
return err;
}
-static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp,
- unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+ struct tcf_proto *tp, unsigned long fh, u32 portid,
+ u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -474,9 +475,11 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
TC_H_MIN(tcm->tcm_info) != tp->protocol)
continue;
if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+ memset(&cb->args[1], 0,
+ sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid,
+ if (tcf_fill_node(net, skb, tp, 0,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
break;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b3b7978..1ea6f76 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,6 +66,7 @@ struct cls_fl_filter {
struct fl_flow_key key;
struct list_head list;
u32 handle;
+ u32 flags;
struct rcu_head rcu;
};
@@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
+ if (!atomic_read(&head->ht.nelems))
+ return -1;
+
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
f = rhashtable_lookup_fast(&head->ht,
fl_key_get_start(&skb_mkey, &head->mask),
head->ht_params);
- if (f) {
+ if (f && !(f->flags & TCA_CLS_FLAGS_SKIP_SW)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
@@ -524,7 +528,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
- u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
@@ -552,8 +555,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
fnew->handle = handle;
- if (tb[TCA_FLOWER_FLAGS])
- flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+ if (tb[TCA_FLOWER_FLAGS]) {
+ fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+ if (!tc_flags_valid(fnew->flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
@@ -563,10 +572,12 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout;
- err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
- head->ht_params);
- if (err)
- goto errout;
+ if (!(fnew->flags & TCA_CLS_FLAGS_SKIP_SW)) {
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ }
fl_hw_replace_filter(tp,
&head->dissector,
@@ -574,7 +585,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
&fnew->key,
&fnew->exts,
(unsigned long)fnew,
- flags);
+ fnew->flags);
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
@@ -734,6 +745,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ddf047d..d4a8bbf 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -982,7 +982,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
- spinlock_t *root_lock;
+ seqcount_t *running;
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT)
@@ -991,14 +991,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if ((sch->parent != TC_H_ROOT) &&
!(sch->flags & TCQ_F_INGRESS) &&
(!p || !(p->flags & TCQ_F_MQROOT)))
- root_lock = qdisc_root_sleeping_lock(sch);
+ running = qdisc_root_sleeping_running(sch);
else
- root_lock = qdisc_lock(sch);
+ running = &sch->running;
err = gen_new_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- root_lock,
+ NULL,
+ running,
tca[TCA_RATE]);
if (err)
goto err_out4;
@@ -1061,7 +1062,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
gen_replace_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
}
out:
@@ -1369,8 +1371,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1381,7 +1382,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
cpu_qstats = q->cpu_qstats;
}
- if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+ &d, cpu_bstats, &q->bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
@@ -1684,8 +1686,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1911af3..7e6c12d 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -519,20 +519,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
return p->link.q->ops->peek(p->link.q);
}
-static unsigned int atm_tc_drop(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- unsigned int len;
-
- pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
- return len;
- }
- return 0;
-}
-
static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -637,7 +623,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &flow->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
@@ -671,7 +658,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
.enqueue = atm_tc_enqueue,
.dequeue = atm_tc_dequeue,
.peek = atm_tc_peek,
- .drop = atm_tc_drop,
.init = atm_tc_init,
.reset = atm_tc_reset,
.destroy = atm_tc_destroy,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index baafddf..f2af31b 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -80,10 +80,6 @@ struct cbq_class {
unsigned char priority; /* class priority */
unsigned char priority2; /* priority to be used after overlimit */
unsigned char ewma_log; /* time constant for idle time calculation */
- unsigned char ovl_strategy;
-#ifdef CONFIG_NET_CLS_ACT
- unsigned char police;
-#endif
u32 defmap;
@@ -94,10 +90,6 @@ struct cbq_class {
u32 avpkt;
struct qdisc_rate_table *R_tab;
- /* Overlimit strategy parameters */
- void (*overlimit)(struct cbq_class *cl);
- psched_tdiff_t penalty;
-
/* General scheduler (WRR) parameters */
long allot;
long quantum; /* Allotment per WRR round */
@@ -382,9 +374,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-#ifdef CONFIG_NET_CLS_ACT
- cl->q->__parent = sch;
-#endif
ret = qdisc_enqueue(skb, cl->q);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
@@ -402,11 +391,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-/* Overlimit actions */
-
-/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
-
-static void cbq_ovl_classic(struct cbq_class *cl)
+/* Overlimit action: penalize leaf class by adding offtime */
+static void cbq_overlimit(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
psched_tdiff_t delay = cl->undertime - q->now;
@@ -456,99 +442,6 @@ static void cbq_ovl_classic(struct cbq_class *cl)
}
}
-/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
- * they go overlimit
- */
-
-static void cbq_ovl_rclassic(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *this = cl;
-
- do {
- if (cl->level > q->toplevel) {
- cl = NULL;
- break;
- }
- } while ((cl = cl->borrow) != NULL);
-
- if (cl == NULL)
- cl = this;
- cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
-
-static void cbq_ovl_delay(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = cl->undertime - q->now;
-
- if (test_bit(__QDISC_STATE_DEACTIVATED,
- &qdisc_root_sleeping(cl->qdisc)->state))
- return;
-
- if (!cl->delayed) {
- psched_time_t sched = q->now;
- ktime_t expires;
-
- delay += cl->offtime;
- if (cl->avgidle < 0)
- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
- if (cl->avgidle < cl->minidle)
- cl->avgidle = cl->minidle;
- cl->undertime = q->now + delay;
-
- if (delay > 0) {
- sched += delay + cl->penalty;
- cl->penalized = sched;
- cl->cpriority = TC_CBQ_MAXPRIO;
- q->pmask |= (1<<TC_CBQ_MAXPRIO);
-
- expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
- if (hrtimer_try_to_cancel(&q->delay_timer) &&
- ktime_to_ns(ktime_sub(
- hrtimer_get_expires(&q->delay_timer),
- expires)) > 0)
- hrtimer_set_expires(&q->delay_timer, expires);
- hrtimer_restart(&q->delay_timer);
- cl->delayed = 1;
- cl->xstats.overactions++;
- return;
- }
- delay = 1;
- }
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
-}
-
-/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
-
-static void cbq_ovl_lowprio(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
- cl->penalized = q->now + cl->penalty;
-
- if (cl->cpriority != cl->priority2) {
- cl->cpriority = cl->priority2;
- q->pmask |= (1<<cl->cpriority);
- cl->xstats.overactions++;
- }
- cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DROP: penalize class by dropping */
-
-static void cbq_ovl_drop(struct cbq_class *cl)
-{
- if (cl->q->ops->drop)
- if (cl->q->ops->drop(cl->q))
- cl->qdisc->q.qlen--;
- cl->xstats.overactions++;
- cbq_ovl_classic(cl);
-}
-
static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
psched_time_t now)
{
@@ -625,40 +518,6 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
-{
- struct Qdisc *sch = child->__parent;
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = q->rx_class;
-
- q->rx_class = NULL;
-
- if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
- int ret;
-
- cbq_mark_toplevel(q, cl);
-
- q->rx_class = cl;
- cl->q->__parent = sch;
-
- ret = qdisc_enqueue(skb, cl->q);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return 0;
- }
- if (net_xmit_drop_count(ret))
- qdisc_qstats_drop(sch);
- return 0;
- }
-
- qdisc_qstats_drop(sch);
- return -1;
-}
-#endif
-
/*
* It is mission critical procedure.
*
@@ -807,7 +666,7 @@ cbq_under_limit(struct cbq_class *cl)
cl = cl->borrow;
if (!cl) {
this_cl->qstats.overlimits++;
- this_cl->overlimit(this_cl);
+ cbq_overlimit(this_cl);
return NULL;
}
if (cl->level > q->toplevel)
@@ -1166,31 +1025,6 @@ static void cbq_link_class(struct cbq_class *this)
}
}
-static unsigned int cbq_drop(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl, *cl_head;
- int prio;
- unsigned int len;
-
- for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
- cl_head = q->active[prio];
- if (!cl_head)
- continue;
-
- cl = cl_head;
- do {
- if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
- sch->q.qlen--;
- if (!cl->q->q.qlen)
- cbq_deactivate_class(cl);
- return len;
- }
- } while ((cl = cl->next_alive) != cl_head);
- }
- return 0;
-}
-
static void
cbq_reset(struct Qdisc *sch)
{
@@ -1280,50 +1114,6 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
return 0;
}
-static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
-{
- switch (ovl->strategy) {
- case TC_CBQ_OVL_CLASSIC:
- cl->overlimit = cbq_ovl_classic;
- break;
- case TC_CBQ_OVL_DELAY:
- cl->overlimit = cbq_ovl_delay;
- break;
- case TC_CBQ_OVL_LOWPRIO:
- if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
- ovl->priority2 - 1 <= cl->priority)
- return -EINVAL;
- cl->priority2 = ovl->priority2 - 1;
- cl->overlimit = cbq_ovl_lowprio;
- break;
- case TC_CBQ_OVL_DROP:
- cl->overlimit = cbq_ovl_drop;
- break;
- case TC_CBQ_OVL_RCLASSIC:
- cl->overlimit = cbq_ovl_rclassic;
- break;
- default:
- return -EINVAL;
- }
- cl->penalty = ovl->penalty;
- return 0;
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
-{
- cl->police = p->police;
-
- if (cl->q->handle) {
- if (p->police == TC_POLICE_RECLASSIFY)
- cl->q->reshape_fail = cbq_reshape_fail;
- else
- cl->q->reshape_fail = NULL;
- }
- return 0;
-}
-#endif
-
static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
{
cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
@@ -1375,8 +1165,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.priority = TC_CBQ_MAXPRIO - 1;
q->link.priority2 = TC_CBQ_MAXPRIO - 1;
q->link.cpriority = TC_CBQ_MAXPRIO - 1;
- q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
- q->link.overlimit = cbq_ovl_classic;
q->link.allot = psched_mtu(qdisc_dev(sch));
q->link.quantum = q->link.allot;
q->link.weight = q->link.R_tab->rate.rate;
@@ -1463,24 +1251,6 @@ nla_put_failure:
return -1;
}
-static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_ovl opt;
-
- opt.strategy = cl->ovl_strategy;
- opt.priority2 = cl->priority2 + 1;
- opt.pad = 0;
- opt.penalty = cl->penalty;
- if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -1500,36 +1270,11 @@ nla_put_failure:
return -1;
}
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_police opt;
-
- if (cl->police) {
- opt.police = cl->police;
- opt.__res1 = 0;
- opt.__res2 = 0;
- if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
- goto nla_put_failure;
- }
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-#endif
-
static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
{
if (cbq_dump_lss(skb, cl) < 0 ||
cbq_dump_rate(skb, cl) < 0 ||
cbq_dump_wrr(skb, cl) < 0 ||
- cbq_dump_ovl(skb, cl) < 0 ||
-#ifdef CONFIG_NET_CLS_ACT
- cbq_dump_police(skb, cl) < 0 ||
-#endif
cbq_dump_fopt(skb, cl) < 0)
return -1;
return 0;
@@ -1600,7 +1345,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
@@ -1618,11 +1364,6 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
&pfifo_qdisc_ops, cl->common.classid);
if (new == NULL)
return -ENOBUFS;
- } else {
-#ifdef CONFIG_NET_CLS_ACT
- if (cl->police == TC_POLICE_RECLASSIFY)
- new->reshape_fail = cbq_reshape_fail;
-#endif
}
*old = qdisc_replace(sch, new, &cl->q);
@@ -1735,6 +1476,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (err < 0)
return err;
+ if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE])
+ return -EOPNOTSUPP;
+
if (cl) {
/* Check parent */
if (parentid) {
@@ -1755,7 +1499,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_put_rtab(rtab);
@@ -1782,14 +1527,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
}
- if (tb[TCA_CBQ_OVL_STRATEGY])
- cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-
-#ifdef CONFIG_NET_CLS_ACT
- if (tb[TCA_CBQ_POLICE])
- cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
-
if (tb[TCA_CBQ_FOPT])
cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
@@ -1848,7 +1585,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
@@ -1884,13 +1622,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->maxidle = q->link.maxidle;
if (cl->avpkt == 0)
cl->avpkt = q->link.avpkt;
- cl->overlimit = cbq_ovl_classic;
- if (tb[TCA_CBQ_OVL_STRATEGY])
- cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-#ifdef CONFIG_NET_CLS_ACT
- if (tb[TCA_CBQ_POLICE])
- cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
if (tb[TCA_CBQ_FOPT])
cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
sch_tree_unlock(sch);
@@ -2035,7 +1766,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
.enqueue = cbq_enqueue,
.dequeue = cbq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = cbq_drop,
.init = cbq_init,
.reset = cbq_reset,
.destroy = cbq_destroy,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 0a08c86..04e0b05 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -365,22 +365,6 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
return skb;
}
-static unsigned int choke_drop(struct Qdisc *sch)
-{
- struct choke_sched_data *q = qdisc_priv(sch);
- unsigned int len;
-
- len = qdisc_queue_drop(sch);
- if (len > 0)
- q->stats.other++;
- else {
- if (!red_is_idling(&q->vars))
- red_start_of_idle_period(&q->vars);
- }
-
- return len;
-}
-
static void choke_reset(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
@@ -569,7 +553,6 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
.enqueue = choke_enqueue,
.dequeue = choke_dequeue,
.peek = choke_peek_head,
- .drop = choke_drop,
.init = choke_init,
.destroy = choke_destroy,
.reset = choke_reset,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bf8af2c..22609e4 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_destroy(cl->qdisc);
@@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (qlen)
xstats.deficit = cl->deficit;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
@@ -420,27 +423,6 @@ out:
return NULL;
}
-static unsigned int drr_drop(struct Qdisc *sch)
-{
- struct drr_sched *q = qdisc_priv(sch);
- struct drr_class *cl;
- unsigned int len;
-
- list_for_each_entry(cl, &q->active, alist) {
- if (cl->qdisc->ops->drop) {
- len = cl->qdisc->ops->drop(cl->qdisc);
- if (len > 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
- return len;
- }
- }
- }
- return 0;
-}
-
static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -510,7 +492,6 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
.enqueue = drr_enqueue,
.dequeue = drr_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = drr_drop,
.init = drr_init_qdisc,
.reset = drr_reset_qdisc,
.destroy = drr_destroy_qdisc,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 34b4dda..b9ba5f6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -320,23 +320,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
return p->q->ops->peek(p->q);
}
-static unsigned int dsmark_drop(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- unsigned int len;
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- if (p->q->ops->drop == NULL)
- return 0;
-
- len = p->q->ops->drop(p->q);
- if (len)
- sch->q.qlen--;
-
- return len;
-}
-
static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -489,7 +472,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
.enqueue = dsmark_enqueue,
.dequeue = dsmark_dequeue,
.peek = dsmark_peek,
- .drop = dsmark_drop,
.init = dsmark_init,
.reset = dsmark_reset,
.destroy = dsmark_destroy,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2177eac..dea70e3 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -24,7 +24,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
return qdisc_enqueue_tail(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
}
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
@@ -32,7 +32,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
}
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
@@ -99,7 +99,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
.enqueue = pfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
@@ -114,7 +113,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.enqueue = bfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
@@ -129,7 +127,6 @@ struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
.enqueue = pfifo_tail_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop_head,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index da250b2..a302e8e 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -184,15 +184,6 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
return idx;
}
-static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
-{
- unsigned int prev_backlog;
-
- prev_backlog = sch->qstats.backlog;
- fq_codel_drop(sch, 1U);
- return prev_backlog - sch->qstats.backlog;
-}
-
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -578,11 +569,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.memory_usage = q->memory_usage;
st.qdisc_stats.drop_overmemory = q->drop_overmemory;
+ sch_tree_lock(sch);
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
list_for_each(pos, &q->old_flows)
st.qdisc_stats.old_flows_len++;
+ sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
}
@@ -636,7 +629,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx < q->flows_cnt) {
const struct fq_codel_flow *flow = &q->flows[idx];
- const struct sk_buff *skb = flow->head;
+ const struct sk_buff *skb;
memset(&xstats, 0, sizeof(xstats));
xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
@@ -654,9 +647,14 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
codel_time_to_us(delta) :
-codel_time_to_us(-delta);
}
- while (skb) {
- qs.qlen++;
- skb = skb->next;
+ if (flow->head) {
+ sch_tree_lock(sch);
+ skb = flow->head;
+ while (skb) {
+ qs.qlen++;
+ skb = skb->next;
+ }
+ sch_tree_unlock(sch);
}
qs.backlog = q->backlogs[idx];
qs.drops = flow->dropped;
@@ -709,7 +707,6 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.enqueue = fq_codel_enqueue,
.dequeue = fq_codel_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = fq_codel_qdisc_drop,
.init = fq_codel_init,
.reset = fq_codel_reset,
.destroy = fq_codel_destroy,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f9e0e9c..0c9cb51 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -112,7 +112,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
/*
* Transmit possibly several skbs, and handle the return status as
- * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * required. Owning running seqcount bit guarantees that
* only one CPU can execute this function.
*
* Returns to the caller:
@@ -165,7 +165,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
/*
* NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
- * __QDISC___STATE_RUNNING guarantees only one CPU can process
+ * running seqcount guarantees only one CPU can process
* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
* this queue.
*
@@ -381,6 +381,7 @@ struct Qdisc noop_qdisc = {
.list = LIST_HEAD_INIT(noop_qdisc.list),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
+ .running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
};
EXPORT_SYMBOL(noop_qdisc);
@@ -539,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops)
@@ -572,6 +574,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+ seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
+
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8010510..b5fb63c7 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -276,40 +276,6 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
return NULL;
}
-static unsigned int gred_drop(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- struct gred_sched *t = qdisc_priv(sch);
-
- skb = qdisc_dequeue_tail(sch);
- if (skb) {
- unsigned int len = qdisc_pkt_len(skb);
- struct gred_sched_data *q;
- u16 dp = tc_index_to_dp(skb);
-
- if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
- net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n",
- tc_index_to_dp(skb));
- } else {
- q->backlog -= len;
- q->stats.other++;
-
- if (gred_wred_mode(t)) {
- if (!sch->qstats.backlog)
- red_start_of_idle_period(&t->wred_set);
- } else {
- if (!q->backlog)
- red_start_of_idle_period(&q->vars);
- }
- }
-
- qdisc_drop(skb, sch);
- return len;
- }
-
- return 0;
-}
-
static void gred_reset(struct Qdisc *sch)
{
int i;
@@ -623,7 +589,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.enqueue = gred_enqueue,
.dequeue = gred_dequeue,
.peek = qdisc_peek_head,
- .drop = gred_drop,
.init = gred_init,
.reset = gred_reset,
.destroy = gred_destroy,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 1ac9f9f..eb3d3f5 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1015,11 +1015,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cur_time = psched_get_time();
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -1068,7 +1067,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
@@ -1373,7 +1373,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.work = cl->cl_total;
xstats.rtwork = cl->cl_cumul;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
@@ -1672,32 +1672,6 @@ hfsc_dequeue(struct Qdisc *sch)
return skb;
}
-static unsigned int
-hfsc_drop(struct Qdisc *sch)
-{
- struct hfsc_sched *q = qdisc_priv(sch);
- struct hfsc_class *cl;
- unsigned int len;
-
- list_for_each_entry(cl, &q->droplist, dlist) {
- if (cl->qdisc->ops->drop != NULL &&
- (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
- if (cl->qdisc->q.qlen == 0) {
- update_vf(cl, 0, 0);
- set_passive(cl);
- } else {
- list_move_tail(&cl->dlist, &q->droplist);
- }
- cl->qstats.drops++;
- qdisc_qstats_drop(sch);
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
- }
- return 0;
-}
-
static const struct Qdisc_class_ops hfsc_class_ops = {
.change = hfsc_change_class,
.delete = hfsc_delete_class,
@@ -1724,7 +1698,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.enqueue = hfsc_enqueue,
.dequeue = hfsc_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = hfsc_drop,
.cl_ops = &hfsc_class_ops,
.priv_size = sizeof(struct hfsc_sched),
.owner = THIS_MODULE
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 13d6f83..c517918 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -368,15 +368,6 @@ static unsigned int hhf_drop(struct Qdisc *sch)
return bucket - q->buckets;
}
-static unsigned int hhf_qdisc_drop(struct Qdisc *sch)
-{
- unsigned int prev_backlog;
-
- prev_backlog = sch->qstats.backlog;
- hhf_drop(sch);
- return prev_backlog - sch->qstats.backlog;
-}
-
static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct hhf_sched_data *q = qdisc_priv(sch);
@@ -709,7 +700,6 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.enqueue = hhf_enqueue,
.dequeue = hhf_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = hhf_qdisc_drop,
.init = hhf_init,
.reset = hhf_reset,
.destroy = hhf_destroy,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d4b4218..b74d066 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -936,31 +936,6 @@ fin:
return skb;
}
-/* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc *sch)
-{
- struct htb_sched *q = qdisc_priv(sch);
- int prio;
-
- for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
- struct list_head *p;
- list_for_each(p, q->drops + prio) {
- struct htb_class *cl = list_entry(p, struct htb_class,
- un.leaf.drop_list);
- unsigned int len;
- if (cl->un.leaf.q->ops->drop &&
- (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- if (!cl->un.leaf.q->q.qlen)
- htb_deactivate(q, cl);
- return len;
- }
- }
- }
- return 0;
-}
-
/* reset all classes */
/* always caled under BH & queue lock */
static void htb_reset(struct Qdisc *sch)
@@ -1141,7 +1116,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
@@ -1395,7 +1371,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
kfree(cl);
@@ -1457,11 +1434,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent->children++;
} else {
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -1599,7 +1575,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.enqueue = htb_enqueue,
.dequeue = htb_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = htb_drop,
.init = htb_init,
.reset = htb_reset,
.destroy = htb_destroy,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 56a77b8..b943982 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b8002ce..549c663 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
* hold here is the look on dev_queue->qdisc_sleeping
* also acquired below.
*/
- spin_unlock_bh(d->lock);
+ if (d->lock)
+ spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
@@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(qdisc_lock(qdisc));
}
/* Reclaim root sleeping lock before completing stats */
- spin_lock_bh(d->lock);
- if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
+ if (d->lock)
+ spin_lock_bh(d->lock);
+ if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
return -1;
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL,
&sch->qstats, sch->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index bcdd54b..5ea9330 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -151,27 +151,6 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
}
-static unsigned int multiq_drop(struct Qdisc *sch)
-{
- struct multiq_sched_data *q = qdisc_priv(sch);
- int band;
- unsigned int len;
- struct Qdisc *qdisc;
-
- for (band = q->bands - 1; band >= 0; band--) {
- qdisc = q->queues[band];
- if (qdisc->ops->drop) {
- len = qdisc->ops->drop(qdisc);
- if (len != 0) {
- sch->q.qlen--;
- return len;
- }
- }
- }
- return 0;
-}
-
-
static void
multiq_reset(struct Qdisc *sch)
{
@@ -356,7 +335,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
@@ -415,7 +395,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.enqueue = multiq_enqueue,
.dequeue = multiq_dequeue,
.peek = multiq_peek,
- .drop = multiq_drop,
.init = multiq_init,
.reset = multiq_reset,
.destroy = multiq_destroy,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 205bed0..9ca7947 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -407,7 +407,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
- qdisc_reshape_fail(skb, sch);
+ qdisc_drop(skb, sch);
return NULL;
}
consume_skb(skb);
@@ -499,7 +499,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
qdisc_qstats_backlog_inc(sch, skb);
@@ -576,35 +576,6 @@ finish_segs:
return NET_XMIT_SUCCESS;
}
-static unsigned int netem_drop(struct Qdisc *sch)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- unsigned int len;
-
- len = qdisc_queue_drop(sch);
-
- if (!len) {
- struct rb_node *p = rb_first(&q->t_root);
-
- if (p) {
- struct sk_buff *skb = netem_rb_to_skb(p);
-
- rb_erase(p, &q->t_root);
- sch->q.qlen--;
- skb->next = NULL;
- skb->prev = NULL;
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- }
- }
- if (!len && q->qdisc && q->qdisc->ops->drop)
- len = q->qdisc->ops->drop(q->qdisc);
- if (len)
- qdisc_qstats_drop(sch);
-
- return len;
-}
-
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -1143,7 +1114,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.enqueue = netem_enqueue,
.dequeue = netem_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = netem_drop,
.init = netem_init,
.reset = netem_reset,
.destroy = netem_destroy,
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 5abfe44..ff0d968 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -96,7 +96,7 @@ static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return qdisc_enqueue_tail(skb, sch);
}
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
}
static struct sk_buff *plug_dequeue(struct Qdisc *sch)
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4b0a821..de49268 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -127,25 +127,6 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
}
-static unsigned int prio_drop(struct Qdisc *sch)
-{
- struct prio_sched_data *q = qdisc_priv(sch);
- int prio;
- unsigned int len;
- struct Qdisc *qdisc;
-
- for (prio = q->bands-1; prio >= 0; prio--) {
- qdisc = q->queues[prio];
- if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
- }
- return 0;
-}
-
-
static void
prio_reset(struct Qdisc *sch)
{
@@ -323,7 +304,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
@@ -382,7 +364,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.enqueue = prio_enqueue,
.dequeue = prio_dequeue,
.peek = prio_peek,
- .drop = prio_drop,
.init = prio_init,
.reset = prio_reset,
.destroy = prio_destroy,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f18857f..0427fa8 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
goto destroy_class;
@@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL,
&cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
@@ -1422,52 +1425,6 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
qfq_deactivate_class(q, cl);
}
-static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
- struct hlist_head *slot)
-{
- struct qfq_aggregate *agg;
- struct qfq_class *cl;
- unsigned int len;
-
- hlist_for_each_entry(agg, slot, next) {
- list_for_each_entry(cl, &agg->active, alist) {
-
- if (!cl->qdisc->ops->drop)
- continue;
-
- len = cl->qdisc->ops->drop(cl->qdisc);
- if (len > 0) {
- if (cl->qdisc->q.qlen == 0)
- qfq_deactivate_class(q, cl);
-
- return len;
- }
- }
- }
- return 0;
-}
-
-static unsigned int qfq_drop(struct Qdisc *sch)
-{
- struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_group *grp;
- unsigned int i, j, len;
-
- for (i = 0; i <= QFQ_MAX_INDEX; i++) {
- grp = &q->groups[i];
- for (j = 0; j < QFQ_MAX_SLOTS; j++) {
- len = qfq_drop_from_slot(q, &grp->slots[j]);
- if (len > 0) {
- sch->q.qlen--;
- return len;
- }
- }
-
- }
-
- return 0;
-}
-
static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -1562,7 +1519,6 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
.enqueue = qfq_enqueue,
.dequeue = qfq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = qfq_drop,
.init = qfq_init_qdisc,
.reset = qfq_reset_qdisc,
.destroy = qfq_destroy_qdisc,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 91578bd..a0d5753 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -136,26 +136,6 @@ static struct sk_buff *red_peek(struct Qdisc *sch)
return child->ops->peek(child);
}
-static unsigned int red_drop(struct Qdisc *sch)
-{
- struct red_sched_data *q = qdisc_priv(sch);
- struct Qdisc *child = q->qdisc;
- unsigned int len;
-
- if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
- q->stats.other++;
- qdisc_qstats_drop(sch);
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
-
- if (!red_is_idling(&q->vars))
- red_start_of_idle_period(&q->vars);
-
- return 0;
-}
-
static void red_reset(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -365,7 +345,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.enqueue = red_enqueue,
.dequeue = red_dequeue,
.peek = red_peek,
- .drop = red_drop,
.init = red_init,
.reset = red_reset,
.destroy = red_destroy,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 498f0a2..a2e0b85 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -896,7 +896,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.enqueue = sfq_enqueue,
.dequeue = sfq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = sfq_drop,
.init = sfq_init,
.reset = sfq_reset,
.destroy = sfq_destroy,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 3161e49..7fa3d6e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -166,7 +166,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs))
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
nb = 0;
while (segs) {
@@ -198,7 +198,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (qdisc_pkt_len(skb) > q->max_size) {
if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
return tbf_segment(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch);
}
ret = qdisc_enqueue(skb, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
@@ -212,19 +212,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-static unsigned int tbf_drop(struct Qdisc *sch)
-{
- struct tbf_sched_data *q = qdisc_priv(sch);
- unsigned int len = 0;
-
- if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- qdisc_qstats_drop(sch);
- }
- return len;
-}
-
static bool tbf_peak_present(const struct tbf_sched_data *q)
{
return q->peak.rate_bytes_ps;
@@ -559,7 +546,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.enqueue = tbf_enqueue,
.dequeue = tbf_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = tbf_drop,
.init = tbf_init,
.reset = tbf_reset,
.destroy = tbf_destroy,
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 0fca582..6c4f749 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
- output.o input.o debug.o ssnmap.o auth.o
+ output.o input.o debug.o ssnmap.o auth.o \
+ offload.o
sctp_probe-y := probe.o
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a701527..6f8e676 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -112,7 +112,6 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_ep_common *rcvr;
struct sctp_transport *transport = NULL;
struct sctp_chunk *chunk;
- struct sctphdr *sh;
union sctp_addr src;
union sctp_addr dest;
int family;
@@ -124,28 +123,29 @@ int sctp_rcv(struct sk_buff *skb)
__SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
- if (skb_linearize(skb))
+ /* If packet is too small to contain a single chunk, let's not
+ * waste time on it anymore.
+ */
+ if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) +
+ skb_transport_offset(skb))
goto discard_it;
- sh = sctp_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+ goto discard_it;
- /* Pull up the IP and SCTP headers. */
+ /* Pull up the IP header. */
__skb_pull(skb, skb_transport_offset(skb));
- if (skb->len < sizeof(struct sctphdr))
- goto discard_it;
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
- else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
+ else if (!sctp_checksum_disable &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+ sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
- skb_pull(skb, sizeof(struct sctphdr));
-
- /* Make sure we at least have chunk headers worth of data left. */
- if (skb->len < sizeof(struct sctp_chunkhdr))
- goto discard_it;
+ __skb_pull(skb, sizeof(struct sctphdr));
family = ipver2af(ip_hdr(skb)->version);
af = sctp_get_af_specific(family);
@@ -230,7 +230,7 @@ int sctp_rcv(struct sk_buff *skb)
chunk->rcvr = rcvr;
/* Remember the SCTP header. */
- chunk->sctp_hdr = sh;
+ chunk->sctp_hdr = sctp_hdr(skb);
/* Set the source and destination addresses of the incoming chunk. */
sctp_init_addrs(chunk, &src, &dest);
@@ -660,19 +660,23 @@ out_unlock:
*/
static int sctp_rcv_ootb(struct sk_buff *skb)
{
- sctp_chunkhdr_t *ch;
- __u8 *ch_end;
-
- ch = (sctp_chunkhdr_t *) skb->data;
+ sctp_chunkhdr_t *ch, _ch;
+ int ch_end, offset = 0;
/* Scan through all the chunks in the packet. */
do {
+ /* Make sure we have at least the header there */
+ if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+ break;
+
+ ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
+
/* Break out if chunk length is less then minimal. */
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb_tail_pointer(skb))
+ ch_end = offset + WORD_ROUND(ntohs(ch->length));
+ if (ch_end > skb->len)
break;
/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -697,8 +701,8 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data)
goto discard;
- ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb_tail_pointer(skb));
+ offset = ch_end;
+ } while (ch_end < skb->len);
return 0;
@@ -1173,6 +1177,17 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;
+ /* We do not allow GSO frames here as we need to linearize and
+ * then cannot guarantee frame boundaries. This shouldn't be an
+ * issue as packets hitting this are mostly INIT or INIT-ACK and
+ * those cannot be on GSO-style anyway.
+ */
+ if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+ return NULL;
+
+ if (skb_linearize(skb))
+ return NULL;
+
ch = (sctp_chunkhdr_t *) skb->data;
/* The code below will attempt to walk the chunk and extract
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 9d87bba..edabbbd 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -130,13 +130,25 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* at this time.
*/
- if ((chunk = queue->in_progress)) {
+ chunk = queue->in_progress;
+ if (chunk) {
/* There is a packet that we have been working on.
* Any post processing work to do before we move on?
*/
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
+ if (chunk->head_skb == chunk->skb) {
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+ goto new_skb;
+ }
+ if (chunk->skb->next) {
+ chunk->skb = chunk->skb->next;
+ goto new_skb;
+ }
+
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
@@ -152,34 +164,64 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (!chunk) {
struct list_head *entry;
+next_chunk:
/* Is the queue empty? */
- if (list_empty(&queue->in_chunk_list))
+ entry = sctp_list_dequeue(&queue->in_chunk_list);
+ if (!entry)
return NULL;
- entry = queue->in_chunk_list.next;
- chunk = queue->in_progress =
- list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
+ chunk = list_entry(entry, struct sctp_chunk, list);
- /* This is the first chunk in the packet. */
- chunk->singleton = 1;
- ch = (sctp_chunkhdr_t *) chunk->skb->data;
- chunk->data_accepted = 0;
+ /* Linearize if it's not GSO */
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
+ skb_is_nonlinear(chunk->skb)) {
+ if (skb_linearize(chunk->skb)) {
+ __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+ sctp_chunk_free(chunk);
+ goto next_chunk;
+ }
+
+ /* Update sctp_hdr as it probably changed */
+ chunk->sctp_hdr = sctp_hdr(chunk->skb);
+ }
+
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+ /* GSO-marked skbs but without frags, handle
+ * them normally
+ */
+ if (skb_shinfo(chunk->skb)->frag_list)
+ chunk->head_skb = chunk->skb;
+
+ /* skbs with "cover letter" */
+ if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+
+ if (WARN_ON(!chunk->skb)) {
+ __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+ sctp_chunk_free(chunk);
+ goto next_chunk;
+ }
+ }
if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+
+ queue->in_progress = chunk;
+
+new_skb:
+ /* This is the first chunk in the packet. */
+ ch = (sctp_chunkhdr_t *) chunk->skb->data;
+ chunk->singleton = 1;
+ chunk->data_accepted = 0;
+ chunk->pdiscard = 0;
+ chunk->auth = 0;
+ chunk->has_asconf = 0;
+ chunk->end_of_packet = 0;
+ chunk->ecn_ce_done = 0;
}
chunk->chunk_hdr = ch;
chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- /* In the unlikely case of an IP reassembly, the skb could be
- * non-linear. If so, update chunk_end so that it doesn't go past
- * the skb->tail.
- */
- if (unlikely(skb_is_nonlinear(chunk->skb))) {
- if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
- chunk->chunk_end = skb_tail_pointer(chunk->skb);
- }
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
new file mode 100644
index 0000000..a37887b
--- /dev/null
+++ b/net/sctp/offload.c
@@ -0,0 +1,98 @@
+/*
+ * sctp_offload - GRO/GSO Offloading for SCTP
+ *
+ * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/checksum.h>
+#include <net/protocol.h>
+
+static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
+{
+ skb->ip_summed = CHECKSUM_NONE;
+ return sctp_compute_cksum(skb, skb_transport_offset(skb));
+}
+
+static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sctphdr *sh;
+
+ sh = sctp_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(*sh)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*sh));
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ struct skb_shared_info *pinfo = skb_shinfo(skb);
+ struct sk_buff *frag_iter;
+
+ pinfo->gso_segs = 0;
+ if (skb->len != skb->data_len) {
+ /* Means we have chunks in here too */
+ pinfo->gso_segs++;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ pinfo->gso_segs++;
+
+ segs = NULL;
+ goto out;
+ }
+
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ if (IS_ERR(segs))
+ goto out;
+
+ /* All that is left is update SCTP CRC if necessary */
+ if (!(features & NETIF_F_SCTP_CRC)) {
+ for (skb = segs; skb; skb = skb->next) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ sh = sctp_hdr(skb);
+ sh->checksum = sctp_gso_make_checksum(skb);
+ }
+ }
+ }
+
+out:
+ return segs;
+}
+
+static const struct net_offload sctp_offload = {
+ .callbacks = {
+ .gso_segment = sctp_gso_segment,
+ },
+};
+
+int __init sctp_offload_init(void)
+{
+ return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9844fe5..1541a91 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
__u32 vtag, int ecn_capable)
{
- struct sctp_chunk *chunk = NULL;
+ struct sctp_transport *tp = packet->transport;
+ struct sctp_association *asoc = tp->asoc;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
+ if (asoc && tp->dst) {
+ struct sock *sk = asoc->base.sk;
+
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+
+ if (sk_can_gso(sk)) {
+ struct net_device *dev = tp->dst->dev;
+
+ packet->max_size = dev->gso_max_size;
+ } else {
+ packet->max_size = asoc->pathmtu;
+ }
+ rcu_read_unlock();
+
+ } else {
+ packet->max_size = tp->pathmtu;
+ }
+
if (ecn_capable && sctp_packet_empty(packet)) {
- chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+ struct sctp_chunk *chunk;
/* If there a is a prepend chunk stick it on the list before
* any other chunks get appended.
*/
+ chunk = sctp_get_ecne_prepend(asoc);
if (chunk)
sctp_packet_append_chunk(packet, chunk);
}
@@ -158,7 +182,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
sctp_xmit_t retval;
int error = 0;
- pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
+ pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
+ packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
case SCTP_XMIT_PMTU_FULL:
@@ -381,12 +406,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sctphdr *sh;
- struct sk_buff *nskb;
+ struct sk_buff *nskb = NULL, *head = NULL;
struct sctp_chunk *chunk, *tmp;
struct sock *sk;
int err = 0;
int padding; /* How much padding do we need? */
+ int pkt_size;
__u8 has_data = 0;
+ int gso = 0;
+ int pktcount = 0;
struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
@@ -400,18 +428,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk;
- /* Allocate the new skb. */
- nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
- if (!nskb)
+ /* Allocate the head skb, or main one if not in GSO */
+ if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (sk_can_gso(sk)) {
+ gso = 1;
+ pkt_size = packet->overhead;
+ } else {
+ /* If this happens, we trash this packet and try
+ * to build a new one, hopefully correct this
+ * time. Application may notice this error.
+ */
+ pr_err_once("Trying to GSO but underlying device doesn't support it.");
+ goto nomem;
+ }
+ } else {
+ pkt_size = packet->size;
+ }
+ head = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!head)
goto nomem;
+ if (gso) {
+ NAPI_GRO_CB(head)->last = head;
+ skb_shinfo(head)->gso_type = sk->sk_gso_type;
+ }
/* Make sure the outbound skb has enough header room reserved. */
- skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ skb_reserve(head, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the
* destination IP address.
*/
- sctp_packet_set_owner_w(nskb, sk);
+ sctp_packet_set_owner_w(head, sk);
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -422,11 +469,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
dst = dst_clone(tp->dst);
if (!dst)
goto no_route;
- skb_dst_set(nskb, dst);
+ skb_dst_set(head, dst);
/* Build the SCTP header. */
- sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
- skb_reset_transport_header(nskb);
+ sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+ skb_reset_transport_header(head);
sh->source = htons(packet->source_port);
sh->dest = htons(packet->destination_port);
@@ -441,90 +488,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag);
sh->checksum = 0;
- /**
- * 6.10 Bundling
- *
- * An endpoint bundles chunks by simply including multiple
- * chunks in one outbound SCTP packet. ...
- */
-
- /**
- * 3.2 Chunk Field Descriptions
- *
- * The total length of a chunk (including Type, Length and
- * Value fields) MUST be a multiple of 4 bytes. If the length
- * of the chunk is not a multiple of 4 bytes, the sender MUST
- * pad the chunk with all zero bytes and this padding is not
- * included in the chunk length field. The sender should
- * never pad with more than 3 bytes.
- *
- * [This whole comment explains WORD_ROUND() below.]
- */
-
pr_debug("***sctp_transmit_packet***\n");
- list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
- list_del_init(&chunk->list);
- if (sctp_chunk_is_data(chunk)) {
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
+ do {
+ /* Set up convenience variables... */
+ chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+ pktcount++;
- if (!chunk->resent && !tp->rto_pending) {
- chunk->rtt_in_progress = 1;
- tp->rto_pending = 1;
+ /* Calculate packet size, so it fits in PMTU. Leave
+ * other chunks for the next packets.
+ */
+ if (gso) {
+ pkt_size = packet->overhead;
+ list_for_each_entry(chunk, &packet->chunk_list, list) {
+ int padded = WORD_ROUND(chunk->skb->len);
+
+ if (pkt_size + padded > tp->pathmtu)
+ break;
+ pkt_size += padded;
}
- has_data = 1;
- }
+ /* Allocate a new skb. */
+ nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!nskb)
+ goto nomem;
- padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
- if (padding)
- memset(skb_put(chunk->skb, padding), 0, padding);
+ /* Make sure the outbound skb has enough header
+ * room reserved.
+ */
+ skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ } else {
+ nskb = head;
+ }
- /* if this is the auth chunk that we are adding,
- * store pointer where it will be added and put
- * the auth into the packet.
+ /**
+ * 3.2 Chunk Field Descriptions
+ *
+ * The total length of a chunk (including Type, Length and
+ * Value fields) MUST be a multiple of 4 bytes. If the length
+ * of the chunk is not a multiple of 4 bytes, the sender MUST
+ * pad the chunk with all zero bytes and this padding is not
+ * included in the chunk length field. The sender should
+ * never pad with more than 3 bytes.
+ *
+ * [This whole comment explains WORD_ROUND() below.]
*/
- if (chunk == packet->auth)
- auth = skb_tail_pointer(nskb);
- memcpy(skb_put(nskb, chunk->skb->len),
+ pkt_size -= packet->overhead;
+ list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+ list_del_init(&chunk->list);
+ if (sctp_chunk_is_data(chunk)) {
+ /* 6.3.1 C4) When data is in flight and when allowed
+ * by rule C5, a new RTT measurement MUST be made each
+ * round trip. Furthermore, new RTT measurements
+ * SHOULD be made no more than once per round-trip
+ * for a given destination transport address.
+ */
+
+ if (!chunk->resent && !tp->rto_pending) {
+ chunk->rtt_in_progress = 1;
+ tp->rto_pending = 1;
+ }
+
+ has_data = 1;
+ }
+
+ padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+ if (padding)
+ memset(skb_put(chunk->skb, padding), 0, padding);
+
+ /* if this is the auth chunk that we are adding,
+ * store pointer where it will be added and put
+ * the auth into the packet.
+ */
+ if (chunk == packet->auth)
+ auth = skb_tail_pointer(nskb);
+
+ memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len);
- pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
- "rtt_in_progress:%d\n", chunk,
- sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
- chunk->has_tsn ? "TSN" : "No TSN",
- chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
- ntohs(chunk->chunk_hdr->length), chunk->skb->len,
- chunk->rtt_in_progress);
-
- /*
- * If this is a control chunk, this is our last
- * reference. Free data chunks after they've been
- * acknowledged or have failed.
- */
- if (!sctp_chunk_is_data(chunk))
- sctp_chunk_free(chunk);
- }
+ pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
+ chunk,
+ sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+ chunk->has_tsn ? "TSN" : "No TSN",
+ chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
+ ntohs(chunk->chunk_hdr->length), chunk->skb->len,
+ chunk->rtt_in_progress);
+
+ /* If this is a control chunk, this is our last
+ * reference. Free data chunks after they've been
+ * acknowledged or have failed.
+ * Re-queue auth chunks if needed.
+ */
+ pkt_size -= WORD_ROUND(chunk->skb->len);
- /* SCTP-AUTH, Section 6.2
- * The sender MUST calculate the MAC as described in RFC2104 [2]
- * using the hash function H as described by the MAC Identifier and
- * the shared association key K based on the endpoint pair shared key
- * described by the shared key identifier. The 'data' used for the
- * computation of the AUTH-chunk is given by the AUTH chunk with its
- * HMAC field set to zero (as shown in Figure 6) followed by all
- * chunks that are placed after the AUTH chunk in the SCTP packet.
- */
- if (auth)
- sctp_auth_calculate_hmac(asoc, nskb,
- (struct sctp_auth_chunk *)auth,
- gfp);
+ if (chunk == packet->auth && !list_empty(&packet->chunk_list))
+ list_add(&chunk->list, &packet->chunk_list);
+ else if (!sctp_chunk_is_data(chunk))
+ sctp_chunk_free(chunk);
+
+ if (!pkt_size)
+ break;
+ }
+
+ /* SCTP-AUTH, Section 6.2
+ * The sender MUST calculate the MAC as described in RFC2104 [2]
+ * using the hash function H as described by the MAC Identifier and
+ * the shared association key K based on the endpoint pair shared key
+ * described by the shared key identifier. The 'data' used for the
+ * computation of the AUTH-chunk is given by the AUTH chunk with its
+ * HMAC field set to zero (as shown in Figure 6) followed by all
+ * chunks that are placed after the AUTH chunk in the SCTP packet.
+ */
+ if (auth)
+ sctp_auth_calculate_hmac(asoc, nskb,
+ (struct sctp_auth_chunk *)auth,
+ gfp);
+
+ if (!gso)
+ break;
+
+ if (skb_gro_receive(&head, nskb))
+ goto nomem;
+ nskb = NULL;
+ if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+ sk->sk_gso_max_segs))
+ goto nomem;
+ } while (!list_empty(&packet->chunk_list));
/* 2) Calculate the Adler-32 checksum of the whole packet,
* including the SCTP common header and all the
@@ -532,16 +622,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*
* Note: Adler-32 is no longer applicable, as has been replaced
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+ *
+ * If it's a GSO packet, it's postponed to sctp_skb_segment.
*/
- if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
- (dst_xfrm(dst) != NULL) || packet->ipfragok) {
- sh->checksum = sctp_compute_cksum(nskb, 0);
+ if (!sctp_checksum_disable || gso) {
+ if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
+ dst_xfrm(dst) || packet->ipfragok)) {
+ sh->checksum = sctp_compute_cksum(head, 0);
} else {
/* no need to seed pseudo checksum for SCTP */
- nskb->ip_summed = CHECKSUM_PARTIAL;
- nskb->csum_start = skb_transport_header(nskb) - nskb->head;
- nskb->csum_offset = offsetof(struct sctphdr, checksum);
+ head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_start = skb_transport_header(head) - head->head;
+ head->csum_offset = offsetof(struct sctphdr, checksum);
}
}
@@ -557,7 +649,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit().
*/
- tp->af_specific->ecn_capable(nskb->sk);
+ tp->af_specific->ecn_capable(sk);
/* Set up the IP options. */
/* BUG: not implemented
@@ -566,7 +658,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
/* Dump that on IP! */
if (asoc) {
- asoc->stats.opackets++;
+ asoc->stats.opackets += pktcount;
if (asoc->peer.last_sent_to != tp)
/* Considering the multiple CPU scenario, this is a
* "correcter" place for last_sent_to. --xguo
@@ -589,16 +681,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
}
- pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
+ pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
+
+ if (gso) {
+ /* Cleanup our debris for IP stacks */
+ memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
- nskb->ignore_df = packet->ipfragok;
- tp->af_specific->sctp_xmit(nskb, tp);
+ skb_shinfo(head)->gso_segs = pktcount;
+ skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+
+ /* We have to refresh this in case we are xmiting to
+ * more than one transport at a time
+ */
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+ rcu_read_unlock();
+ }
+ head->ignore_df = packet->ipfragok;
+ tp->af_specific->sctp_xmit(head, tp);
out:
sctp_packet_reset(packet);
return err;
no_route:
- kfree_skb(nskb);
+ kfree_skb(head);
+ if (nskb != head)
+ kfree_skb(nskb);
if (asoc)
IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@@ -751,39 +863,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk,
u16 chunk_len)
{
- size_t psize;
- size_t pmtu;
- int too_big;
+ size_t psize, pmtu;
sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
- pmtu = ((packet->transport->asoc) ?
- (packet->transport->asoc->pathmtu) :
- (packet->transport->pathmtu));
-
- too_big = (psize + chunk_len > pmtu);
+ if (packet->transport->asoc)
+ pmtu = packet->transport->asoc->pathmtu;
+ else
+ pmtu = packet->transport->pathmtu;
/* Decide if we need to fragment or resubmit later. */
- if (too_big) {
- /* It's OK to fragmet at IP level if any one of the following
+ if (psize + chunk_len > pmtu) {
+ /* It's OK to fragment at IP level if any one of the following
* is true:
- * 1. The packet is empty (meaning this chunk is greater
- * the MTU)
- * 2. The chunk we are adding is a control chunk
- * 3. The packet doesn't have any data in it yet and data
- * requires authentication.
+ * 1. The packet is empty (meaning this chunk is greater
+ * the MTU)
+ * 2. The packet doesn't have any data in it yet and data
+ * requires authentication.
*/
- if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
+ if (sctp_packet_empty(packet) ||
(!packet->has_data && chunk->auth)) {
/* We no longer do re-fragmentation.
* Just fragment at the IP layer, if we
* actually hit this condition
*/
packet->ipfragok = 1;
- } else {
- retval = SCTP_XMIT_PMTU_FULL;
+ goto out;
}
+
+ /* It is also okay to fragment if the chunk we are
+ * adding is a control chunk, but only if current packet
+ * is not a GSO one otherwise it causes fragmentation of
+ * a large frame. So in this case we allow the
+ * fragmentation by forcing it to be in a new packet.
+ */
+ if (!sctp_chunk_is_data(chunk) && packet->has_data)
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (psize + chunk_len > packet->max_size)
+ /* Hit GSO/PMTU limit, gotta flush */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (!packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->cwnd >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->burst_limited >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of original cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+ /* Otherwise it will fit in the GSO packet */
}
+out:
return retval;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d3d50da..40022ee 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1516,6 +1516,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_add_protocol;
+ if (sctp_offload_init() < 0)
+ pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
+
out:
return status;
err_v6_add_protocol:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67154b8..712fb23 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT;
}
+ sk->sk_gso_type = SKB_GSO_SCTP;
+
/* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7059c94..a904ccd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -87,7 +87,6 @@ struct tipc_stats {
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
* @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @keepalive_intv: link keepalive timer interval
* @abort_limit: # of unacknowledged continuity probes needed to reset link
* @state: current state of link FSM
* @peer_caps: bitmap describing capabilities of peer node
@@ -131,7 +130,6 @@ struct tipc_link {
u32 peer_bearer_id;
u32 bearer_id;
u32 tolerance;
- unsigned long keepalive_intv;
u32 abort_limit;
u32 state;
u16 peer_caps;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e01e2c71..d6a490f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -378,14 +378,13 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
{
unsigned long tol = tipc_link_tolerance(l);
unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
- unsigned long keepalive_intv = msecs_to_jiffies(intv);
/* Link with lowest tolerance determines timer interval */
- if (keepalive_intv < n->keepalive_intv)
- n->keepalive_intv = keepalive_intv;
+ if (intv < n->keepalive_intv)
+ n->keepalive_intv = intv;
- /* Ensure link's abort limit corresponds to current interval */
- tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv));
+ /* Ensure link's abort limit corresponds to current tolerance */
+ tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
}
static void tipc_node_delete(struct tipc_node *node)
@@ -526,7 +525,7 @@ static void tipc_node_timeout(unsigned long data)
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
}
- mod_timer(&n->timer, jiffies + n->keepalive_intv);
+ mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv));
}
/**
@@ -735,6 +734,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
bool accept_addr = false;
bool reset = true;
char *if_name;
+ unsigned long intv;
*dupl_addr = false;
*respond = false;
@@ -840,9 +840,11 @@ void tipc_node_check_dest(struct net *net, u32 onode,
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
- if (n->link_cnt == 1)
- if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ if (n->link_cnt == 1) {
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
tipc_node_get(n);
+ }
}
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
@@ -950,7 +952,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
state = SELF_UP_PEER_UP;
break;
case SELF_LOST_CONTACT_EVT:
- state = SELF_DOWN_PEER_LEAVING;
+ state = SELF_DOWN_PEER_DOWN;
break;
case SELF_ESTABL_CONTACT_EVT:
case PEER_LOST_CONTACT_EVT:
@@ -969,7 +971,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
state = SELF_UP_PEER_UP;
break;
case PEER_LOST_CONTACT_EVT:
- state = SELF_LEAVING_PEER_DOWN;
+ state = SELF_DOWN_PEER_DOWN;
break;
case SELF_LOST_CONTACT_EVT:
case PEER_ESTABL_CONTACT_EVT: