summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2014-04-10 00:49:54 (GMT)
committerScott Wood <scottwood@freescale.com>2014-04-10 00:49:54 (GMT)
commitb86c95253af2105c9824146c6569a6b0f39ab124 (patch)
tree9100acbdc843b1081b154135000b89ee95cd10d3 /mm
parente5feac72dad5475167445de9af564c2d592872bb (diff)
parent07c8b57b111585a617b2b456497fc9b33c00743c (diff)
downloadlinux-fsl-qoriq-b86c95253af2105c9824146c6569a6b0f39ab124.tar.xz
Merge branch 'rtmerge' into sdk-v1.6.x
This merges 3.12.15-rt25. Signed-off-by: Scott Wood <scottwood@freescale.com> Conflicts: drivers/misc/Makefile drivers/net/ethernet/freescale/gianfar.c drivers/net/ethernet/freescale/gianfar_ethtool.c drivers/net/ethernet/freescale/gianfar_sysfs.c
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/bounce.c4
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/highmem.c6
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/memory.c58
-rw-r--r--mm/mmu_context.c2
-rw-r--r--mm/page_alloc.c145
-rw-r--r--mm/page_cgroup.c11
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slub.c126
-rw-r--r--mm/swap.c34
-rw-r--r--mm/vmalloc.c13
-rw-r--r--mm/vmstat.c6
14 files changed, 316 insertions, 101 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 394838f..083685a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -384,7 +384,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
config TRANSPARENT_HUGEPAGE
bool "Transparent Hugepage Support"
- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
select COMPACTION
help
Transparent Hugepages allows the kernel to use huge pages and
diff --git a/mm/bounce.c b/mm/bounce.c
index 5a7d58f..b09bb4e 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
unsigned long flags;
unsigned char *vto;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
vto = kmap_atomic(to->bv_page);
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
kunmap_atomic(vto);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
#else /* CONFIG_HIGHMEM */
diff --git a/mm/filemap.c b/mm/filemap.c
index ae4846f..3d2d39a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1976,7 +1976,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
char *kaddr;
size_t copied;
- BUG_ON(!in_atomic());
+ BUG_ON(!pagefault_disabled());
kaddr = kmap_atomic(page);
if (likely(i->nr_segs == 1)) {
int left;
diff --git a/mm/highmem.c b/mm/highmem.c
index b32b70c..b1c7d43 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,10 +29,11 @@
#include <linux/kgdb.h>
#include <asm/tlbflush.h>
-
+#ifndef CONFIG_PREEMPT_RT_FULL
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
DEFINE_PER_CPU(int, __kmap_atomic_idx);
#endif
+#endif
/*
* Virtual_count is not a pure "count".
@@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
unsigned long totalhigh_pages __read_mostly;
EXPORT_SYMBOL(totalhigh_pages);
-
+#ifndef CONFIG_PREEMPT_RT_FULL
EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
+#endif
unsigned int nr_free_highpages (void)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 15429b9..bc16ebc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2473,7 +2473,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
/* Notify other cpus that system-wide "drain" is running */
get_online_cpus();
- curcpu = get_cpu();
+ curcpu = get_cpu_light();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
@@ -2490,7 +2490,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
schedule_work_on(cpu, &stock->work);
}
}
- put_cpu();
+ put_cpu_light();
if (!sync)
goto out;
diff --git a/mm/memory.c b/mm/memory.c
index 22e67a2..0dcdc84 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3742,6 +3742,32 @@ unlock:
return 0;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+void pagefault_disable(void)
+{
+ migrate_disable();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+EXPORT_SYMBOL(pagefault_disable);
+
+void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ current->pagefault_disabled--;
+ migrate_enable();
+}
+EXPORT_SYMBOL(pagefault_enable);
+#endif
+
/*
* By the time we get here, we already hold the mm semaphore
*/
@@ -4318,3 +4344,35 @@ void copy_user_huge_page(struct page *dst, struct page *src,
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
+/*
+ * Heinous hack, relies on the caller doing something like:
+ *
+ * pte = alloc_pages(PGALLOC_GFP, 0);
+ * if (pte)
+ * pgtable_page_ctor(pte);
+ * return pte;
+ *
+ * This ensures we release the page and return NULL when the
+ * lock allocation fails.
+ */
+struct page *pte_lock_init(struct page *page)
+{
+ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (page->ptl) {
+ spin_lock_init(__pte_lockptr(page));
+ } else {
+ __free_page(page);
+ page = NULL;
+ }
+ return page;
+}
+
+void pte_lock_deinit(struct page *page)
+{
+ kfree(page->ptl);
+ page->mapping = NULL;
+}
+
+#endif
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 8a8cd02..adfce87 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
+ preempt_disable_rt();
active_mm = tsk->active_mm;
if (active_mm != mm) {
atomic_inc(&mm->mm_count);
@@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm)
}
tsk->mm = mm;
switch_mm(active_mm, mm, tsk);
+ preempt_enable_rt();
task_unlock(tsk);
if (active_mm != mm)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6fca390..36c40eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
#include <linux/page-debug-flags.h>
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
+#include <linux/locallock.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -230,6 +231,18 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define cpu_lock_irqsave(cpu, flags) \
+ local_lock_irqsave_on(pa_lock, flags, cpu)
+# define cpu_unlock_irqrestore(cpu, flags) \
+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
+#else
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
+#endif
+
int page_group_by_mobility_disabled __read_mostly;
void set_pageblock_migratetype(struct page *page, int migratetype)
@@ -635,7 +648,7 @@ static inline int free_pages_check(struct page *page)
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -646,15 +659,49 @@ static inline int free_pages_check(struct page *page)
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct list_head *list)
{
- int migratetype = 0;
- int batch_free = 0;
int to_free = count;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
zone->pages_scanned = 0;
+ while (!list_empty(list)) {
+ struct page *page = list_first_entry(list, struct page, lru);
+ int mt; /* migratetype of the to-be-freed page */
+
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+
+ mt = get_freepage_migratetype(page);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, mt);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ if (likely(!is_migrate_isolate_page(page))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
+ if (is_migrate_cma(mt))
+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+ }
+
+ to_free--;
+ }
+ WARN_ON(to_free != 0);
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Moves a number of pages from the PCP lists to free list which
+ * is freed outside of the locked region.
+ *
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ */
+static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
+ struct list_head *dst)
+{
+ int migratetype = 0, batch_free = 0;
+
while (to_free) {
struct page *page;
struct list_head *list;
@@ -670,7 +717,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
- list = &pcp->lists[migratetype];
+ list = &src->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
@@ -678,35 +725,25 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free = to_free;
do {
- int mt; /* migratetype of the to-be-freed page */
-
- page = list_entry(list->prev, struct page, lru);
- /* must delete as __free_one_page list manipulates */
+ page = list_last_entry(list, struct page, lru);
list_del(&page->lru);
- mt = get_freepage_migratetype(page);
- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- __free_one_page(page, zone, 0, mt);
- trace_mm_page_pcpu_drain(page, 0, mt);
- if (likely(!is_migrate_isolate_page(page))) {
- __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
- if (is_migrate_cma(mt))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
- }
+ list_add(&page->lru, dst);
} while (--to_free && --batch_free && !list_empty(list));
}
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone, struct page *page, int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
zone->pages_scanned = 0;
__free_one_page(page, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -744,12 +781,12 @@ static void __free_pages_ok(struct page *page, unsigned int order)
if (!free_pages_prepare(page, order))
return;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
__count_vm_events(PGFREE, 1 << order);
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, order, migratetype);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
void __init __free_pages_bootmem(struct page *page, unsigned int order)
@@ -1204,20 +1241,22 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
+ LIST_HEAD(dst);
int to_drain;
unsigned long batch;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
batch = ACCESS_ONCE(pcp->batch);
if (pcp->count >= batch)
to_drain = batch;
else
to_drain = pcp->count;
if (to_drain > 0) {
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
pcp->count -= to_drain;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, to_drain, &dst);
}
static bool gfp_thisnode_allocation(gfp_t gfp_mask)
{
@@ -1245,16 +1284,21 @@ static void drain_pages(unsigned int cpu)
for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count) {
+ isolate_pcp_pages(count, pcp, &dst);
pcp->count = 0;
}
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
+ if (count)
+ free_pcppages_bulk(zone, count, &dst);
}
}
@@ -1307,7 +1351,12 @@ void drain_all_pages(void)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
+#ifndef CONFIG_PREEMPT_RT_BASE
on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
+#else
+ for_each_cpu(cpu, &cpus_with_pcps)
+ drain_pages(cpu);
+#endif
}
#ifdef CONFIG_HIBERNATION
@@ -1362,7 +1411,7 @@ void free_hot_cold_page(struct page *page, int cold)
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
__count_vm_event(PGFREE);
/*
@@ -1388,12 +1437,17 @@ void free_hot_cold_page(struct page *page, int cold)
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = ACCESS_ONCE(pcp->batch);
- free_pcppages_bulk(zone, batch, pcp);
+ LIST_HEAD(dst);
+
+ isolate_pcp_pages(batch, pcp, &dst);
pcp->count -= batch;
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, batch, &dst);
+ return;
}
out:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1523,7 +1577,7 @@ again:
struct per_cpu_pages *pcp;
struct list_head *list;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
@@ -1555,13 +1609,15 @@ again:
*/
WARN_ON_ONCE(order > 1);
}
- spin_lock_irqsave(&zone->lock, flags);
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- spin_unlock(&zone->lock);
- if (!page)
+ if (!page) {
+ spin_unlock(&zone->lock);
goto failed;
+ }
__mod_zone_freepage_state(zone, -(1 << order),
get_pageblock_migratetype(page));
+ spin_unlock(&zone->lock);
}
/*
@@ -1573,7 +1629,7 @@ again:
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
@@ -1581,7 +1637,7 @@ again:
return page;
failed:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return NULL;
}
@@ -2260,8 +2316,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct page *page;
/* Page migration frees to the PCP lists but we want merging */
- drain_pages(get_cpu());
- put_cpu();
+ drain_pages(get_cpu_light());
+ put_cpu_light();
page = get_page_from_freelist(gfp_mask, nodemask,
order, zonelist, high_zoneidx,
@@ -5463,6 +5519,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
void __init page_alloc_init(void)
{
hotcpu_notifier(page_alloc_cpu_notify, 0);
+ local_irq_lock_init(pa_lock);
}
/*
@@ -6326,7 +6383,7 @@ void zone_pcp_reset(struct zone *zone)
struct per_cpu_pageset *pset;
/* avoid races with drain_pages() */
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
@@ -6335,7 +6392,7 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6d757e3a..98caeee 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -13,6 +13,14 @@
static unsigned long total_usage;
+static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
+{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ for (; nr_pages; nr_pages--, pc++)
+ spin_lock_init(&pc->pcg_lock);
+#endif
+}
+
#if !defined(CONFIG_SPARSEMEM)
@@ -60,6 +68,7 @@ static int __init alloc_node_page_cgroup(int nid)
return -ENOMEM;
NODE_DATA(nid)->node_page_cgroup = base;
total_usage += table_size;
+ page_cgroup_lock_init(base, nr_pages);
return 0;
}
@@ -150,6 +159,8 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
return -ENOMEM;
}
+ page_cgroup_lock_init(base, PAGES_PER_SECTION);
+
/*
* The passed "pfn" may not be aligned to SECTION. For the calculation
* we need to apply a mask.
diff --git a/mm/slab.h b/mm/slab.h
index a535033..8ffb287 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -247,7 +247,11 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
* The slab lists for all objects.
*/
struct kmem_cache_node {
+#ifdef CONFIG_SLUB
+ raw_spinlock_t list_lock;
+#else
spinlock_t list_lock;
+#endif
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
diff --git a/mm/slub.c b/mm/slub.c
index 5c1343a..a164648 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1087,7 +1087,7 @@ static noinline struct kmem_cache_node *free_debug_processing(
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- spin_lock_irqsave(&n->list_lock, *flags);
+ raw_spin_lock_irqsave(&n->list_lock, *flags);
slab_lock(page);
if (!check_slab(s, page))
@@ -1135,7 +1135,7 @@ out:
fail:
slab_unlock(page);
- spin_unlock_irqrestore(&n->list_lock, *flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, *flags);
slab_fix(s, "Object at 0x%p not freed", object);
return NULL;
}
@@ -1270,6 +1270,12 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
#endif /* CONFIG_SLUB_DEBUG */
+struct slub_free_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
+
/*
* Slab allocation and freeing
*/
@@ -1291,10 +1297,15 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
struct page *page;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
+ bool enableirqs;
flags &= gfp_allowed_mask;
- if (flags & __GFP_WAIT)
+ enableirqs = (flags & __GFP_WAIT) != 0;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ enableirqs |= system_state == SYSTEM_RUNNING;
+#endif
+ if (enableirqs)
local_irq_enable();
flags |= s->allocflags;
@@ -1334,7 +1345,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
kmemcheck_mark_unallocated_pages(page, pages);
}
- if (flags & __GFP_WAIT)
+ if (enableirqs)
local_irq_disable();
if (!page)
return NULL;
@@ -1352,8 +1363,10 @@ static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
setup_object_debug(s, page, object);
+#ifndef CONFIG_PREEMPT_RT_FULL
if (unlikely(s->ctor))
s->ctor(object);
+#endif
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1431,6 +1444,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_memcg_kmem_pages(page, order);
}
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_slab(page->slab_cache, page);
+ }
+}
+
#define need_reserve_slab_rcu \
(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
@@ -1465,6 +1488,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
}
call_rcu(head, rcu_free_slab);
+ } else if (irqs_disabled()) {
+ struct slub_free_list *f = &__get_cpu_var(slub_free_list);
+
+ raw_spin_lock(&f->lock);
+ list_add(&page->lru, &f->list);
+ raw_spin_unlock(&f->lock);
} else
__free_slab(s, page);
}
@@ -1569,7 +1598,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!n || !n->nr_partial)
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
@@ -1594,7 +1623,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
break;
}
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return object;
}
@@ -1837,7 +1866,7 @@ redo:
* that acquire_slab() will see a slab page that
* is frozen
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
} else {
m = M_FULL;
@@ -1848,7 +1877,7 @@ redo:
* slabs from diagnostic functions will not see
* any frozen slabs.
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
}
@@ -1883,7 +1912,7 @@ redo:
goto redo;
if (lock)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
@@ -1915,10 +1944,10 @@ static void unfreeze_partials(struct kmem_cache *s,
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
n = n2;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
do {
@@ -1947,7 +1976,7 @@ static void unfreeze_partials(struct kmem_cache *s,
}
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
while (discard_page) {
page = discard_page;
@@ -1985,14 +2014,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
pobjects = oldpage->pobjects;
pages = oldpage->pages;
if (drain && pobjects > s->cpu_partial) {
+ struct slub_free_list *f;
unsigned long flags;
+ LIST_HEAD(tofree);
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save(flags);
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ f = &__get_cpu_var(slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2056,7 +2092,22 @@ static bool has_cpu_slab(int cpu, void *info)
static void flush_all(struct kmem_cache *s)
{
+ LIST_HEAD(tofree);
+ int cpu;
+
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ for_each_online_cpu(cpu) {
+ struct slub_free_list *f;
+
+ if (!has_cpu_slab(cpu, s))
+ continue;
+
+ f = &per_cpu(slub_free_list, cpu);
+ raw_spin_lock_irq(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock_irq(&f->lock);
+ free_delayed(&tofree);
+ }
}
/*
@@ -2084,10 +2135,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
unsigned long x = 0;
struct page *page;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
@@ -2230,9 +2281,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
+ struct slub_free_list *f;
void *freelist;
struct page *page;
unsigned long flags;
+ LIST_HEAD(tofree);
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@@ -2295,7 +2348,13 @@ load_freelist:
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
+out:
+ f = &__get_cpu_var(slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
return freelist;
new_slab:
@@ -2313,9 +2372,7 @@ new_slab:
if (unlikely(!freelist)) {
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
-
- local_irq_restore(flags);
- return NULL;
+ goto out;
}
page = c->page;
@@ -2330,8 +2387,7 @@ new_slab:
deactivate_slab(s, page, get_freepointer(s, freelist));
c->page = NULL;
c->freelist = NULL;
- local_irq_restore(flags);
- return freelist;
+ goto out;
}
/*
@@ -2416,6 +2472,10 @@ redo:
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->object_size);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (unlikely(s->ctor) && object)
+ s->ctor(object);
+#endif
slab_post_alloc_hook(s, gfpflags, object);
@@ -2503,7 +2563,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
do {
if (unlikely(n)) {
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
@@ -2535,7 +2595,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
}
}
@@ -2577,7 +2637,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return;
slab_empty:
@@ -2591,7 +2651,7 @@ slab_empty:
/* Slab must be on the full list */
remove_full(s, page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
@@ -2793,7 +2853,7 @@ static void
init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- spin_lock_init(&n->list_lock);
+ raw_spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
@@ -3379,7 +3439,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
for (i = 0; i < objects; i++)
INIT_LIST_HEAD(slabs_by_inuse + i);
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
/*
* Build lists indexed by the items in use in each slab.
@@ -3400,7 +3460,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
for (i = objects - 1; i > 0; i--)
list_splice(slabs_by_inuse + i, n->partial.prev);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
@@ -3576,6 +3636,12 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
+ }
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -3880,7 +3946,7 @@ static int validate_slab_node(struct kmem_cache *s,
struct page *page;
unsigned long flags;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru) {
validate_slab_slab(s, page, map);
@@ -3903,7 +3969,7 @@ static int validate_slab_node(struct kmem_cache *s,
atomic_long_read(&n->nr_slabs));
out:
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return count;
}
@@ -4093,12 +4159,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
if (!atomic_long_read(&n->nr_slabs))
continue;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc, map);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
}
for (i = 0; i < t.count; i++) {
diff --git a/mm/swap.c b/mm/swap.c
index aa4da5d..05a6951 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -32,6 +32,7 @@
#include <linux/gfp.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
+#include <linux/locallock.h>
#include "internal.h"
@@ -45,6 +46,9 @@ static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
+static DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
+
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
@@ -408,11 +412,11 @@ void rotate_reclaimable_page(struct page *page)
unsigned long flags;
page_cache_get(page);
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pvec = &__get_cpu_var(lru_rotate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
}
@@ -463,12 +467,13 @@ static bool need_activate_page_drain(int cpu)
void activate_page(struct page *page)
{
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
+ activate_page_pvecs);
page_cache_get(page);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
- put_cpu_var(activate_page_pvecs);
+ put_locked_var(swapvec_lock, activate_page_pvecs);
}
}
@@ -494,7 +499,7 @@ void activate_page(struct page *page)
static void __lru_cache_activate_page(struct page *page)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
int i;
/*
@@ -516,7 +521,7 @@ static void __lru_cache_activate_page(struct page *page)
}
}
- put_cpu_var(lru_add_pvec);
+ put_locked_var(swapvec_lock, lru_add_pvec);
}
/*
@@ -556,13 +561,13 @@ EXPORT_SYMBOL(mark_page_accessed);
*/
void __lru_cache_add(struct page *page)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
page_cache_get(page);
if (!pagevec_space(pvec))
__pagevec_lru_add(pvec);
pagevec_add(pvec, page);
- put_cpu_var(lru_add_pvec);
+ put_locked_var(swapvec_lock, lru_add_pvec);
}
EXPORT_SYMBOL(__lru_cache_add);
@@ -685,9 +690,9 @@ void lru_add_drain_cpu(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
@@ -715,18 +720,19 @@ void deactivate_page(struct page *page)
return;
if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
+ lru_deactivate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
- put_cpu_var(lru_deactivate_pvecs);
+ put_locked_var(swapvec_lock, lru_deactivate_pvecs);
}
}
void lru_add_drain(void)
{
- lru_add_drain_cpu(get_cpu());
- put_cpu();
+ lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
+ local_unlock_cpu(swapvec_lock);
}
static void lru_add_drain_per_cpu(struct work_struct *dummy)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1074543..d64289d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -790,7 +790,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
- int node, err;
+ int node, err, cpu;
node = numa_node_id();
@@ -828,11 +828,12 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
BUG_ON(err);
radix_tree_preload_end();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
spin_lock(&vbq->lock);
list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
return vb;
}
@@ -900,6 +901,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int cpu = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -915,7 +917,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
again:
rcu_read_lock();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
int i;
@@ -939,7 +942,7 @@ next:
spin_unlock(&vb->lock);
}
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
rcu_read_unlock();
if (!addr) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5a442a7..efea337 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -217,6 +217,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
long x;
long t;
+ preempt_disable_rt();
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
@@ -226,6 +227,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
x = 0;
}
__this_cpu_write(*p, x);
+ preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_zone_page_state);
@@ -258,6 +260,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -266,6 +269,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep);
}
+ preempt_enable_rt();
}
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -280,6 +284,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -288,6 +293,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep);
}
+ preempt_enable_rt();
}
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)