diff options
author | Scott Wood <scottwood@freescale.com> | 2013-04-05 22:43:55 (GMT) |
---|---|---|
committer | Scott Wood <scottwood@freescale.com> | 2013-04-05 22:43:55 (GMT) |
commit | 392aeab578c624bb00787b21d7a0b27f31785f23 (patch) | |
tree | 56d38ed817345321aa2d24add000d2f147298e92 /net | |
parent | ca1eff3e439fa5597da8cdf3f5b83ce962a0d912 (diff) | |
parent | 4d72f19861e95cf911e0336882f0958f39f5cad0 (diff) | |
download | linux-fsl-qoriq-392aeab578c624bb00787b21d7a0b27f31785f23.tar.xz |
Merge tag 'v3.8.4-rt2'
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 101 | ||||
-rw-r--r-- | net/core/skbuff.c | 6 | ||||
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 30 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 5 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/mac80211/rx.c | 2 | ||||
-rw-r--r-- | net/netfilter/core.c | 6 | ||||
-rw-r--r-- | net/packet/af_packet.c | 5 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 3 |
10 files changed, 134 insertions, 42 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1339f77..454b151 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -203,7 +203,7 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -seqcount_t devnet_rename_seq; +DEFINE_MUTEX(devnet_rename_mutex); static inline void dev_base_seq_inc(struct net *net) { @@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_lock(&sd->input_pkt_queue.lock); + raw_spin_lock(&sd->input_pkt_queue.raw_lock); #endif } static inline void rps_unlock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_unlock(&sd->input_pkt_queue.lock); + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); #endif } @@ -1093,10 +1093,11 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - write_seqcount_begin(&devnet_rename_seq); + + mutex_lock(&devnet_rename_mutex); if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { - write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); return 0; } @@ -1104,7 +1105,7 @@ int dev_change_name(struct net_device *dev, const char *newname) err = dev_get_valid_name(net, dev, newname); if (err < 0) { - write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); return err; } @@ -1112,11 +1113,11 @@ rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); return ret; } - write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); @@ -1135,7 +1136,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; - write_seqcount_begin(&devnet_rename_seq); + mutex_lock(&devnet_rename_mutex); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -1946,6 +1947,7 @@ static inline void __netif_reschedule(struct Qdisc *q) sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); + preempt_check_resched_rt(); } void __netif_schedule(struct Qdisc *q) @@ -1967,6 +1969,7 @@ void dev_kfree_skb_irq(struct sk_buff *skb) sd->completion_queue = skb; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); + preempt_check_resched_rt(); } } EXPORT_SYMBOL(dev_kfree_skb_irq); @@ -3052,6 +3055,7 @@ enqueue: rps_unlock(sd); local_irq_restore(flags); + preempt_check_resched_rt(); atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); @@ -3089,7 +3093,7 @@ int netif_rx(struct sk_buff *skb) struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; - preempt_disable(); + migrate_disable(); rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -3099,13 +3103,13 @@ int netif_rx(struct sk_buff *skb) ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); - preempt_enable(); + migrate_enable(); } else #endif { unsigned int qtail; - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); - put_cpu(); + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); + put_cpu_light(); } return ret; } @@ -3115,16 +3119,44 @@ int netif_rx_ni(struct sk_buff *skb) { int err; - preempt_disable(); + local_bh_disable(); err = netif_rx(skb); - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); + local_bh_enable(); return err; } EXPORT_SYMBOL(netif_rx_ni); +#ifdef CONFIG_PREEMPT_RT_FULL +/* + * RT runs ksoftirqd as a real time thread and the root_lock is a + * "sleeping spinlock". If the trylock fails then we can go into an + * infinite loop when ksoftirqd preempted the task which actually + * holds the lock, because we requeue q and raise NET_TX softirq + * causing ksoftirqd to loop forever. + * + * It's safe to use spin_lock on RT here as softirqs run in thread + * context and cannot deadlock against the thread which is holding + * root_lock. + * + * On !RT the trylock might fail, but there we bail out from the + * softirq loop after 10 attempts which we can't do on RT. And the + * task holding root_lock cannot be preempted, so the only downside of + * that trylock is that we need 10 loops to decide that we should have + * given up in the first one :) + */ +static inline int take_root_lock(spinlock_t *lock) +{ + spin_lock(lock); + return 1; +} +#else +static inline int take_root_lock(spinlock_t *lock) +{ + return spin_trylock(lock); +} +#endif + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -3163,7 +3195,7 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; root_lock = qdisc_lock(q); - if (spin_trylock(root_lock)) { + if (take_root_lock(root_lock)) { smp_mb__before_clear_bit(); clear_bit(__QDISC_STATE_SCHED, &q->state); @@ -3528,7 +3560,7 @@ static void flush_backlog(void *arg) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } @@ -3537,10 +3569,13 @@ static void flush_backlog(void *arg) skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } + + if (!skb_queue_empty(&sd->tofree_queue)) + raise_softirq_irqoff(NET_RX_SOFTIRQ); } static int napi_gro_complete(struct sk_buff *skb) @@ -3899,6 +3934,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) } else #endif local_irq_enable(); + preempt_check_resched_rt(); } static int process_backlog(struct napi_struct *napi, int quota) @@ -3971,6 +4007,7 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); ____napi_schedule(&__get_cpu_var(softnet_data), n); local_irq_restore(flags); + preempt_check_resched_rt(); } EXPORT_SYMBOL(__napi_schedule); @@ -4045,10 +4082,17 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *sd = &__get_cpu_var(softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; + struct sk_buff *skb; void *have; local_irq_disable(); + while ((skb = __skb_dequeue(&sd->tofree_queue))) { + local_irq_enable(); + kfree_skb(skb); + local_irq_disable(); + } + while (!list_empty(&sd->poll_list)) { struct napi_struct *n; int work, weight; @@ -4171,7 +4215,6 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; - unsigned seq; /* * Fetch the caller's info block. @@ -4180,19 +4223,18 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; -retry: - seq = read_seqcount_begin(&devnet_rename_seq); + mutex_lock(&devnet_rename_mutex); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { rcu_read_unlock(); + mutex_unlock(&devnet_rename_mutex); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) - goto retry; + mutex_unlock(&devnet_rename_mutex); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -6520,6 +6562,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); + preempt_check_resched_rt(); /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { @@ -6530,6 +6573,9 @@ static int dev_cpu_callback(struct notifier_block *nfb, netif_rx(skb); input_queue_head_incr(oldsd); } + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { + kfree_skb(skb); + } return NOTIFY_OK; } @@ -6802,8 +6848,9 @@ static int __init net_dev_init(void) struct softnet_data *sd = &per_cpu(softnet_data, i); memset(sd, 0, sizeof(*sd)); - skb_queue_head_init(&sd->input_pkt_queue); - skb_queue_head_init(&sd->process_queue); + skb_queue_head_init_raw(&sd->input_pkt_queue); + skb_queue_head_init_raw(&sd->process_queue); + skb_queue_head_init_raw(&sd->tofree_queue); sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); sd->output_queue = NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f4a73cb..59e6d12 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -60,6 +60,7 @@ #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> +#include <linux/locallock.h> #include <net/protocol.h> #include <net/dst.h> @@ -347,6 +348,7 @@ struct netdev_alloc_cache { unsigned int pagecnt_bias; }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) @@ -359,7 +361,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) int order; unsigned long flags; - local_irq_save(flags); + local_lock_irqsave(netdev_alloc_lock, flags); nc = &__get_cpu_var(netdev_alloc_cache); if (unlikely(!nc->frag.page)) { refill: @@ -393,7 +395,7 @@ recycle: nc->frag.offset += fragsz; nc->pagecnt_bias--; end: - local_irq_restore(flags); + local_unlock_irqrestore(netdev_alloc_lock, flags); return data; } diff --git a/net/core/sock.c b/net/core/sock.c index bc131d4..2754c99 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -571,7 +571,6 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, struct net *net = sock_net(sk); struct net_device *dev; char devname[IFNAMSIZ]; - unsigned seq; if (sk->sk_bound_dev_if == 0) { len = 0; @@ -582,20 +581,19 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, if (len < IFNAMSIZ) goto out; -retry: - seq = read_seqcount_begin(&devnet_rename_seq); + mutex_lock(&devnet_rename_mutex); rcu_read_lock(); dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); ret = -ENODEV; if (!dev) { rcu_read_unlock(); + mutex_unlock(&devnet_rename_mutex); goto out; } strcpy(devname, dev->name); rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) - goto retry; + mutex_unlock(&devnet_rename_mutex); len = strlen(devname) + 1; @@ -2287,12 +2285,11 @@ void lock_sock_nested(struct sock *sk, int subclass) if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); /* * The sk_lock has mutex_lock() semantics here: */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); - local_bh_enable(); } EXPORT_SYMBOL(lock_sock_nested); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3ac5dff..d8bbe94 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -69,6 +69,7 @@ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/fcntl.h> +#include <linux/sysrq.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> @@ -768,6 +769,30 @@ static void icmp_redirect(struct sk_buff *skb) } /* + * 32bit and 64bit have different timestamp length, so we check for + * the cookie at offset 20 and verify it is repeated at offset 50 + */ +#define CO_POS0 20 +#define CO_POS1 50 +#define CO_SIZE sizeof(int) +#define ICMP_SYSRQ_SIZE 57 + +/* + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie + * pattern and if it matches send the next byte as a trigger to sysrq. + */ +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) +{ + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); + char *p = skb->data; + + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) + handle_sysrq(p[CO_POS0 + CO_SIZE]); +} + +/* * Handle ICMP_ECHO ("ping") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo @@ -794,6 +819,11 @@ static void icmp_echo(struct sk_buff *skb) icmp_param.data_len = skb->len; icmp_param.head_len = sizeof(struct icmphdr); icmp_reply(&icmp_param, skb); + + if (skb->len == ICMP_SYSRQ_SIZE && + net->ipv4.sysctl_icmp_echo_sysrq) { + icmp_check_sysrq(net, skb); + } } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2..253692b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1508,7 +1508,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, if (IS_ERR(rt)) return; - inet = &get_cpu_var(unicast_sock); + get_cpu_light(); + inet = &__get_cpu_var(unicast_sock); inet->tos = arg->tos; sk = &inet->sk; @@ -1532,7 +1533,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ip_push_pending_frames(sk, &fl4); } - put_cpu_var(unicast_sock); + put_cpu_light(); ip_rt_put(rt); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d84400b..44bf3b0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -815,6 +815,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "icmp_echo_sysrq", + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 580704e..c58f3cd 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3144,7 +3144,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_supported_band *sband; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - WARN_ON_ONCE(softirq_count() == 0); + WARN_ON_ONCE_NONRT(softirq_count() == 0); if (WARN_ON(status->band >= IEEE80211_NUM_BANDS)) goto drop; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b..c646ec8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -20,11 +20,17 @@ #include <linux/proc_fs.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/locallock.h> #include <net/net_namespace.h> #include <net/sock.h> #include "nf_internals.h" +#ifdef CONFIG_PREEMPT_RT_BASE +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); +EXPORT_PER_CPU_SYMBOL(xt_write_lock); +#endif + static DEFINE_MUTEX(afinfo_mutex); const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c111bd0..92a2359 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,6 +88,7 @@ #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> +#include <linux/delay.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -553,7 +554,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ - cpu_relax(); + cpu_chill(); } } @@ -807,7 +808,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, if (!(status & TP_STATUS_BLK_TMO)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ - cpu_relax(); + cpu_chill(); } } prb_close_block(pkc, pbd, po, status); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e8fdb17..5a44c6e 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <linux/rculist.h> #include <linux/llist.h> +#include <linux/delay.h> #include "rds.h" #include "ib.h" @@ -286,7 +287,7 @@ static inline void wait_clean_list_grace(void) for_each_online_cpu(cpu) { flag = &per_cpu(clean_list_grace, cpu); while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) - cpu_relax(); + cpu_chill(); } } |