summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-22 03:05:45 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-22 03:05:45 (GMT)
commitdf32e43a54d04eda35d2859beaf90e3864d53288 (patch)
tree7a61cf658b2949bd426285eb9902be7758ced1ba /include
parentfbd918a2026d0464ce9c23f57b7de4bcfccdc2e6 (diff)
parent78d5506e82b21a1a1de68c24182db2c2fe521422 (diff)
downloadlinux-df32e43a54d04eda35d2859beaf90e3864d53288.tar.xz
Merge branch 'akpm' (incoming from Andrew)
Merge first patch-bomb from Andrew Morton: - a couple of misc things - inotify/fsnotify work from Jan - ocfs2 updates (partial) - about half of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits) mm/migrate: remove unused function, fail_migrate_page() mm/migrate: remove putback_lru_pages, fix comment on putback_movable_pages mm/migrate: correct failure handling if !hugepage_migration_support() mm/migrate: add comment about permanent failure path mm, page_alloc: warn for non-blockable __GFP_NOFAIL allocation failure mm: compaction: reset scanner positions immediately when they meet mm: compaction: do not mark unmovable pageblocks as skipped in async compaction mm: compaction: detect when scanners meet in isolate_freepages mm: compaction: reset cached scanner pfn's before reading them mm: compaction: encapsulate defer reset logic mm: compaction: trace compaction begin and end memcg, oom: lock mem_cgroup_print_oom_info sched: add tracepoints related to NUMA task migration mm: numa: do not automatically migrate KSM pages mm: numa: trace tasks that fail migration due to rate limiting mm: numa: limit scope of lock for NUMA migrate rate limiting mm: numa: make NUMA-migrate related functions static lib/show_mem.c: show num_poisoned_pages when oom mm/hwpoison: add '#' to hwpoison_inject mm/memblock: use WARN_ONCE when MAX_NUMNODES passed as input parameter ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bootmem.h153
-rw-r--r--include/linux/compaction.h16
-rw-r--r--include/linux/dma-debug.h6
-rw-r--r--include/linux/fsnotify_backend.h118
-rw-r--r--include/linux/huge_mm.h23
-rw-r--r--include/linux/hugetlb.h7
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/ksm.h15
-rw-r--r--include/linux/memblock.h54
-rw-r--r--include/linux/mempolicy.h32
-rw-r--r--include/linux/migrate.h6
-rw-r--r--include/linux/mm.h70
-rw-r--r--include/linux/mman.h1
-rw-r--r--include/linux/mmzone.h11
-rw-r--r--include/linux/posix_acl.h78
-rw-r--r--include/linux/rmap.h27
-rw-r--r--include/linux/sched.h12
-rw-r--r--include/trace/events/compaction.h42
-rw-r--r--include/trace/events/migrate.h26
-rw-r--r--include/trace/events/sched.h87
20 files changed, 543 insertions, 243 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f1f07d3..2fae55d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -5,6 +5,7 @@
#define _LINUX_BOOTMEM_H
#include <linux/mmzone.h>
+#include <linux/mm_types.h>
#include <asm/dma.h>
/*
@@ -52,7 +53,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long size);
extern void free_bootmem(unsigned long physaddr, unsigned long size);
extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
* Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
@@ -142,6 +142,157 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+
+#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
+
+/* FIXME: use MEMBLOCK_ALLOC_* variants here */
+#define BOOTMEM_ALLOC_ACCESSIBLE 0
+#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
+
+/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
+void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
+ phys_addr_t align, phys_addr_t min_addr,
+ phys_addr_t max_addr, int nid);
+void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr, int nid);
+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+
+static inline void * __init memblock_virt_alloc(
+ phys_addr_t size, phys_addr_t align)
+{
+ return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
+ BOOTMEM_ALLOC_ACCESSIBLE,
+ NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+ phys_addr_t size, phys_addr_t align)
+{
+ return memblock_virt_alloc_try_nid_nopanic(size, align,
+ BOOTMEM_LOW_LIMIT,
+ BOOTMEM_ALLOC_ACCESSIBLE,
+ NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+ phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+ return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
+ BOOTMEM_ALLOC_ACCESSIBLE,
+ NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+ phys_addr_t size, int nid)
+{
+ return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+ BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+ phys_addr_t size, int nid)
+{
+ return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
+ BOOTMEM_ALLOC_ACCESSIBLE,
+ nid);
+}
+
+static inline void __init memblock_free_early(
+ phys_addr_t base, phys_addr_t size)
+{
+ __memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+ phys_addr_t base, phys_addr_t size, int nid)
+{
+ __memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_late(
+ phys_addr_t base, phys_addr_t size)
+{
+ __memblock_free_late(base, size);
+}
+
+#else
+
+#define BOOTMEM_ALLOC_ACCESSIBLE 0
+
+
+/* Fall back to all the existing bootmem APIs */
+static inline void * __init memblock_virt_alloc(
+ phys_addr_t size, phys_addr_t align)
+{
+ if (!align)
+ align = SMP_CACHE_BYTES;
+ return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+ phys_addr_t size, phys_addr_t align)
+{
+ if (!align)
+ align = SMP_CACHE_BYTES;
+ return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+ phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+ return __alloc_bootmem_nopanic(size, align, min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+ phys_addr_t size, int nid)
+{
+ return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
+ BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+ phys_addr_t size, int nid)
+{
+ return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+ SMP_CACHE_BYTES,
+ BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
+ phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+ return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
+ min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid_nopanic(
+ phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+ return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
+ min_addr, max_addr);
+}
+
+static inline void __init memblock_free_early(
+ phys_addr_t base, phys_addr_t size)
+{
+ free_bootmem(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+ phys_addr_t base, phys_addr_t size, int nid)
+{
+ free_bootmem_node(NODE_DATA(nid), base, size);
+}
+
+static inline void __init memblock_free_late(
+ phys_addr_t base, phys_addr_t size)
+{
+ free_bootmem_late(base, size);
+}
+#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
+
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
extern void *alloc_remap(int nid, unsigned long size);
#else
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 091d72e..7e1c76e 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order)
return zone->compact_considered < defer_limit;
}
+/*
+ * Update defer tracking counters after successful compaction of given order,
+ * which means an allocation either succeeded (alloc_success == true) or is
+ * expected to succeed.
+ */
+static inline void compaction_defer_reset(struct zone *zone, int order,
+ bool alloc_success)
+{
+ if (alloc_success) {
+ zone->compact_considered = 0;
+ zone->compact_defer_shift = 0;
+ }
+ if (order >= zone->compact_order_failed)
+ zone->compact_order_failed = order + 1;
+}
+
/* Returns true if restarting compaction after many failures */
static inline bool compaction_restarting(struct zone *zone, int order)
{
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index fc0e34c..fe8cb61 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
extern void debug_dma_dump_mappings(struct device *dev);
+extern void debug_dma_assert_idle(struct page *page);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
{
}
+static inline void debug_dma_assert_idle(struct page *page)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 4b2ee8d1..7d8d5e6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -15,7 +15,6 @@
#include <linux/path.h> /* struct path */
#include <linux/spinlock.h>
#include <linux/types.h>
-
#include <linux/atomic.h>
/*
@@ -79,6 +78,7 @@ struct fsnotify_group;
struct fsnotify_event;
struct fsnotify_mark;
struct fsnotify_event_private_data;
+struct fsnotify_fname;
/*
* Each group much define these ops. The fsnotify infrastructure will call
@@ -94,17 +94,27 @@ struct fsnotify_event_private_data;
* userspace messages that marks have been removed.
*/
struct fsnotify_ops {
- bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *vfsmount_mark,
- __u32 mask, void *data, int data_type);
int (*handle_event)(struct fsnotify_group *group,
+ struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- struct fsnotify_event *event);
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
- void (*free_event_priv)(struct fsnotify_event_private_data *priv);
+ void (*free_event)(struct fsnotify_event *event);
+};
+
+/*
+ * all of the information about the original object we want to now send to
+ * a group. If you want to carry more info from the accessing task to the
+ * listener this structure is where you need to be adding fields.
+ */
+struct fsnotify_event {
+ struct list_head list;
+ /* inode may ONLY be dereferenced during handle_event(). */
+ struct inode *inode; /* either the inode the event happened to or its parent */
+ u32 mask; /* the type of access, bitwise OR for FS_* event types */
};
/*
@@ -148,7 +158,11 @@ struct fsnotify_group {
* a group */
struct list_head marks_list; /* all inode marks for this group */
- struct fasync_struct *fsn_fa; /* async notification */
+ struct fasync_struct *fsn_fa; /* async notification */
+
+ struct fsnotify_event overflow_event; /* Event we queue when the
+ * notification list is too
+ * full */
/* groups can define private fields here or use the void *private */
union {
@@ -177,76 +191,10 @@ struct fsnotify_group {
};
};
-/*
- * A single event can be queued in multiple group->notification_lists.
- *
- * each group->notification_list will point to an event_holder which in turns points
- * to the actual event that needs to be sent to userspace.
- *
- * Seemed cheaper to create a refcnt'd event and a small holder for every group
- * than create a different event for every group
- *
- */
-struct fsnotify_event_holder {
- struct fsnotify_event *event;
- struct list_head event_list;
-};
-
-/*
- * Inotify needs to tack data onto an event. This struct lets us later find the
- * correct private data of the correct group.
- */
-struct fsnotify_event_private_data {
- struct fsnotify_group *group;
- struct list_head event_list;
-};
-
-/*
- * all of the information about the original object we want to now send to
- * a group. If you want to carry more info from the accessing task to the
- * listener this structure is where you need to be adding fields.
- */
-struct fsnotify_event {
- /*
- * If we create an event we are also likely going to need a holder
- * to link to a group. So embed one holder in the event. Means only
- * one allocation for the common case where we only have one group
- */
- struct fsnotify_event_holder holder;
- spinlock_t lock; /* protection for the associated event_holder and private_list */
- /* to_tell may ONLY be dereferenced during handle_event(). */
- struct inode *to_tell; /* either the inode the event happened to or its parent */
- /*
- * depending on the event type we should have either a path or inode
- * We hold a reference on path, but NOT on inode. Since we have the ref on
- * the path, it may be dereferenced at any point during this object's
- * lifetime. That reference is dropped when this object's refcnt hits
- * 0. If this event contains an inode instead of a path, the inode may
- * ONLY be used during handle_event().
- */
- union {
- struct path path;
- struct inode *inode;
- };
/* when calling fsnotify tell it if the data is a path or inode */
#define FSNOTIFY_EVENT_NONE 0
#define FSNOTIFY_EVENT_PATH 1
#define FSNOTIFY_EVENT_INODE 2
- int data_type; /* which of the above union we have */
- atomic_t refcnt; /* how many groups still are using/need to send this event */
- __u32 mask; /* the type of access, bitwise OR for FS_* event types */
-
- u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
- const unsigned char *file_name;
- size_t name_len;
- struct pid *tgid;
-
-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- __u32 response; /* userspace answer to question */
-#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
-
- struct list_head private_data_list; /* groups can store private data here */
-};
/*
* Inode specific fields in an fsnotify_mark
@@ -370,17 +318,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
extern void fsnotify_destroy_group(struct fsnotify_group *group);
/* fasync handler function */
extern int fsnotify_fasync(int fd, struct file *file, int on);
-/* take a reference to an event */
-extern void fsnotify_get_event(struct fsnotify_event *event);
-extern void fsnotify_put_event(struct fsnotify_event *event);
-/* find private data previously attached to an event and unlink it */
-extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
- struct fsnotify_event *event);
-
+/* Free event from memory */
+extern void fsnotify_destroy_event(struct fsnotify_group *group,
+ struct fsnotify_event *event);
/* attach the event to the group notification queue */
extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
struct fsnotify_event *event,
- struct fsnotify_event_private_data *priv,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *));
/* true if the group notification queue is empty */
@@ -430,15 +373,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct list_head *list);
/* put here because inotify does some weird stuff when destroying watches */
-extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
- void *data, int data_is,
- const unsigned char *name,
- u32 cookie, gfp_t gfp);
-
-/* fanotify likes to change events after they are on lists... */
-extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
-extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
- struct fsnotify_event *new_event);
+extern void fsnotify_init_event(struct fsnotify_event *event,
+ struct inode *to_tell, u32 mask);
#else
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 91672e2..db51201 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -157,6 +157,26 @@ static inline int hpage_nr_pages(struct page *page)
return HPAGE_PMD_NR;
return 1;
}
+/*
+ * compound_trans_head() should be used instead of compound_head(),
+ * whenever the "page" passed as parameter could be the tail of a
+ * transparent hugepage that could be undergoing a
+ * __split_huge_page_refcount(). The page structure layout often
+ * changes across releases and it makes extensive use of unions. So if
+ * the page structure layout will change in a way that
+ * page->first_page gets clobbered by __split_huge_page_refcount, the
+ * implementation making use of smp_rmb() will be required.
+ *
+ * Currently we define compound_trans_head as compound_head, because
+ * page->private is in the same union with page->first_page, and
+ * page->private isn't clobbered. However this also means we're
+ * currently leaving dirt into the page->private field of anonymous
+ * pages resulting from a THP split, instead of setting page->private
+ * to zero like for every other page that has PG_private not set. But
+ * anonymous pages don't use page->private so this is not a problem.
+ */
+#if 0
+/* This will be needed if page->private will be clobbered in split_huge_page */
static inline struct page *compound_trans_head(struct page *page)
{
if (PageTail(page)) {
@@ -174,6 +194,9 @@ static inline struct page *compound_trans_head(struct page *page)
}
return page;
}
+#else
+#define compound_trans_head(page) compound_head(page)
+#endif
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index bd7e987..d01cc97 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
void hugepage_put_subpool(struct hugepage_subpool *spool);
int PageHuge(struct page *page);
-int PageHeadHuge(struct page *page_head);
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -104,11 +103,6 @@ static inline int PageHuge(struct page *page)
return 0;
}
-static inline int PageHeadHuge(struct page *page_head)
-{
- return 0;
-}
-
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
}
@@ -360,6 +354,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
static inline struct hstate *page_hstate(struct page *page)
{
+ VM_BUG_ON(!PageHuge(page));
return size_to_hstate(PAGE_SIZE << compound_order(page));
}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f0e5238..1516a8f 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -41,6 +41,7 @@ extern struct fs_struct init_fs;
#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
+ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
.shared_pending = { \
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
@@ -222,6 +223,7 @@ extern struct task_group root_task_group;
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
}, \
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
+ .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
INIT_IDS \
INIT_PERF_EVENTS(tsk) \
INIT_TRACE_IRQFLAGS \
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 45c9b6a..3be6bb1 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -73,11 +73,7 @@ static inline void set_page_stable_node(struct page *page,
struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
-int page_referenced_ksm(struct page *page,
- struct mem_cgroup *memcg, unsigned long *vm_flags);
-int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
-int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
- struct vm_area_struct *, unsigned long, void *), void *arg);
+int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
void ksm_migrate_page(struct page *newpage, struct page *oldpage);
#else /* !CONFIG_KSM */
@@ -115,13 +111,8 @@ static inline int page_referenced_ksm(struct page *page,
return 0;
}
-static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
-{
- return 0;
-}
-
-static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
- struct vm_area_struct *, unsigned long, void *), void *arg)
+static inline int rmap_walk_ksm(struct page *page,
+ struct rmap_walk_control *rwc)
{
return 0;
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 77c60e5..cd0274b 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -19,9 +19,13 @@
#define INIT_MEMBLOCK_REGIONS 128
+/* Definition of memblock flags. */
+#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */
+
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
+ unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid;
#endif
@@ -43,12 +47,17 @@ struct memblock {
extern struct memblock memblock;
extern int memblock_debug;
+#ifdef CONFIG_MOVABLE_NODE
+/* If movable_node boot option specified */
+extern bool movable_node_enabled;
+#endif /* CONFIG_MOVABLE_NODE */
#define memblock_dbg(fmt, ...) \
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
- phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
+ phys_addr_t start, phys_addr_t end,
+ int nid);
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
phys_addr_t size, phys_addr_t align);
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -59,6 +68,28 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
int memblock_free(phys_addr_t base, phys_addr_t size);
int memblock_reserve(phys_addr_t base, phys_addr_t size);
void memblock_trim_memory(phys_addr_t align);
+int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
+int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
+#ifdef CONFIG_MOVABLE_NODE
+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
+{
+ return m->flags & MEMBLOCK_HOTPLUG;
+}
+
+static inline bool movable_node_is_enabled(void)
+{
+ return movable_node_enabled;
+}
+#else
+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
+{
+ return false;
+}
+static inline bool movable_node_is_enabled(void)
+{
+ return false;
+}
+#endif
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
@@ -87,7 +118,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
/**
* for_each_free_mem_range - iterate through free memblock areas
* @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
* @p_nid: ptr to int for nid of the range, can be %NULL
@@ -107,7 +138,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
/**
* for_each_free_mem_range_reverse - rev-iterate through free memblock areas
* @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
* @p_nid: ptr to int for nid of the range, can be %NULL
@@ -121,8 +152,21 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
i != (u64)ULLONG_MAX; \
__next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
+static inline void memblock_set_region_flags(struct memblock_region *r,
+ unsigned long flags)
+{
+ r->flags |= flags;
+}
+
+static inline void memblock_clear_region_flags(struct memblock_region *r,
+ unsigned long flags)
+{
+ r->flags &= ~flags;
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
+int memblock_set_node(phys_addr_t base, phys_addr_t size,
+ struct memblock_type *type, int nid);
static inline void memblock_set_region_node(struct memblock_region *r, int nid)
{
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 9fe426b..5f1ea75 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -211,20 +211,8 @@ static inline void mpol_get(struct mempolicy *pol)
{
}
-static inline struct mempolicy *mpol_dup(struct mempolicy *old)
-{
- return NULL;
-}
-
struct shared_policy {};
-static inline int mpol_set_shared_policy(struct shared_policy *info,
- struct vm_area_struct *vma,
- struct mempolicy *new)
-{
- return -EINVAL;
-}
-
static inline void mpol_shared_policy_init(struct shared_policy *sp,
struct mempolicy *mpol)
{
@@ -234,12 +222,6 @@ static inline void mpol_free_shared_policy(struct shared_policy *p)
{
}
-static inline struct mempolicy *
-mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
-{
- return NULL;
-}
-
#define vma_policy(vma) NULL
static inline int
@@ -266,10 +248,6 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
}
-static inline void mpol_fix_fork_child_flag(struct task_struct *p)
-{
-}
-
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask)
@@ -284,12 +262,6 @@ static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
return false;
}
-static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
- const nodemask_t *mask)
-{
- return false;
-}
-
static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
const nodemask_t *to, int flags)
{
@@ -307,10 +279,6 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
}
#endif
-static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
-{
-}
-
static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
unsigned long address)
{
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f015c05..84a31ad 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -35,16 +35,12 @@ enum migrate_reason {
#ifdef CONFIG_MIGRATION
-extern void putback_lru_pages(struct list_head *l);
extern void putback_movable_pages(struct list_head *l);
extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, enum migrate_mode mode, int reason);
-extern int fail_migrate_page(struct address_space *,
- struct page *, struct page *);
-
extern int migrate_prep(void);
extern int migrate_prep_local(void);
extern int migrate_vmas(struct mm_struct *mm,
@@ -59,7 +55,6 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
int extra_count);
#else
-static inline void putback_lru_pages(struct list_head *l) {}
static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, enum migrate_mode mode, int reason)
@@ -86,7 +81,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
/* Possible settings for the migrate_page() method in address_operations */
#define migrate_page NULL
-#define fail_migrate_page NULL
#endif /* CONFIG_MIGRATION */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3552717..a512dd8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
+extern int sysctl_overcommit_memory;
+extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
+
+extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
+ size_t *, loff_t *);
+extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
+ size_t *, loff_t *);
+
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
/* to align the pointer to the (next) page boundary */
@@ -414,15 +423,44 @@ static inline int page_count(struct page *page)
return atomic_read(&compound_head(page)->_count);
}
+#ifdef CONFIG_HUGETLB_PAGE
+extern int PageHeadHuge(struct page *page_head);
+#else /* CONFIG_HUGETLB_PAGE */
+static inline int PageHeadHuge(struct page *page_head)
+{
+ return 0;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static inline bool __compound_tail_refcounted(struct page *page)
+{
+ return !PageSlab(page) && !PageHeadHuge(page);
+}
+
+/*
+ * This takes a head page as parameter and tells if the
+ * tail page reference counting can be skipped.
+ *
+ * For this to be safe, PageSlab and PageHeadHuge must remain true on
+ * any given page where they return true here, until all tail pins
+ * have been released.
+ */
+static inline bool compound_tail_refcounted(struct page *page)
+{
+ VM_BUG_ON(!PageHead(page));
+ return __compound_tail_refcounted(page);
+}
+
static inline void get_huge_page_tail(struct page *page)
{
/*
- * __split_huge_page_refcount() cannot run
- * from under us.
+ * __split_huge_page_refcount() cannot run from under us.
*/
+ VM_BUG_ON(!PageTail(page));
VM_BUG_ON(page_mapcount(page) < 0);
VM_BUG_ON(atomic_read(&page->_count) != 0);
- atomic_inc(&page->_mapcount);
+ if (compound_tail_refcounted(page->first_page))
+ atomic_inc(&page->_mapcount);
}
extern bool __get_page_tail(struct page *page);
@@ -846,11 +884,14 @@ static __always_inline void *lowmem_page_address(const struct page *page)
#endif
#if defined(WANT_PAGE_VIRTUAL)
-#define page_address(page) ((page)->virtual)
-#define set_page_address(page, address) \
- do { \
- (page)->virtual = (address); \
- } while(0)
+static inline void *page_address(const struct page *page)
+{
+ return page->virtual;
+}
+static inline void set_page_address(struct page *page, void *address)
+{
+ page->virtual = address;
+}
#define page_address_init() do { } while(0)
#endif
@@ -984,7 +1025,6 @@ extern void pagefault_out_of_memory(void);
* various contexts.
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */
extern void show_free_areas(unsigned int flags);
extern bool skip_free_areas_node(unsigned int flags, int nid);
@@ -1318,6 +1358,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
+void __init ptlock_cache_init(void);
extern bool ptlock_alloc(struct page *page);
extern void ptlock_free(struct page *page);
@@ -1326,6 +1367,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
return page->ptl;
}
#else /* ALLOC_SPLIT_PTLOCKS */
+static inline void ptlock_cache_init(void)
+{
+}
+
static inline bool ptlock_alloc(struct page *page)
{
return true;
@@ -1378,10 +1423,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
+static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct page *page) { return true; }
static inline void pte_lock_deinit(struct page *page) {}
#endif /* USE_SPLIT_PTE_PTLOCKS */
+static inline void pgtable_init(void)
+{
+ ptlock_cache_init();
+ pgtable_cache_init();
+}
+
static inline bool pgtable_page_ctor(struct page *page)
{
inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 7f7f8da..16373c8 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -9,6 +9,7 @@
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
extern struct percpu_counter vm_committed_as;
#ifdef CONFIG_SMP
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bd791e4..5f2052c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -490,6 +490,12 @@ struct zone {
unsigned long managed_pages;
/*
+ * Number of MIGRATE_RESEVE page block. To maintain for just
+ * optimization. Protected by zone->lock.
+ */
+ int nr_migrate_reserve_block;
+
+ /*
* rarely used fields:
*/
const char *name;
@@ -758,10 +764,7 @@ typedef struct pglist_data {
int kswapd_max_order;
enum zone_type classzone_idx;
#ifdef CONFIG_NUMA_BALANCING
- /*
- * Lock serializing the per destination node AutoNUMA memory
- * migration rate limiting data.
- */
+ /* Lock serializing the migrate rate limiting window */
spinlock_t numabalancing_migrate_lock;
/* Rate limiting time interval */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 7931efe..fb61694 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -94,78 +94,12 @@ extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
extern struct posix_acl *get_posix_acl(struct inode *, int);
extern int set_posix_acl(struct inode *, int, struct posix_acl *);
-#ifdef CONFIG_FS_POSIX_ACL
-static inline struct posix_acl **acl_by_type(struct inode *inode, int type)
-{
- switch (type) {
- case ACL_TYPE_ACCESS:
- return &inode->i_acl;
- case ACL_TYPE_DEFAULT:
- return &inode->i_default_acl;
- default:
- BUG();
- }
-}
-
-static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
-{
- struct posix_acl **p = acl_by_type(inode, type);
- struct posix_acl *acl = ACCESS_ONCE(*p);
- if (acl) {
- spin_lock(&inode->i_lock);
- acl = *p;
- if (acl != ACL_NOT_CACHED)
- acl = posix_acl_dup(acl);
- spin_unlock(&inode->i_lock);
- }
- return acl;
-}
-
-static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
-{
- return rcu_dereference(*acl_by_type(inode, type));
-}
-
-static inline void set_cached_acl(struct inode *inode,
- int type,
- struct posix_acl *acl)
-{
- struct posix_acl **p = acl_by_type(inode, type);
- struct posix_acl *old;
- spin_lock(&inode->i_lock);
- old = *p;
- rcu_assign_pointer(*p, posix_acl_dup(acl));
- spin_unlock(&inode->i_lock);
- if (old != ACL_NOT_CACHED)
- posix_acl_release(old);
-}
-
-static inline void forget_cached_acl(struct inode *inode, int type)
-{
- struct posix_acl **p = acl_by_type(inode, type);
- struct posix_acl *old;
- spin_lock(&inode->i_lock);
- old = *p;
- *p = ACL_NOT_CACHED;
- spin_unlock(&inode->i_lock);
- if (old != ACL_NOT_CACHED)
- posix_acl_release(old);
-}
-
-static inline void forget_all_cached_acls(struct inode *inode)
-{
- struct posix_acl *old_access, *old_default;
- spin_lock(&inode->i_lock);
- old_access = inode->i_acl;
- old_default = inode->i_default_acl;
- inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
- spin_unlock(&inode->i_lock);
- if (old_access != ACL_NOT_CACHED)
- posix_acl_release(old_access);
- if (old_default != ACL_NOT_CACHED)
- posix_acl_release(old_default);
-}
-#endif
+struct posix_acl **acl_by_type(struct inode *inode, int type);
+struct posix_acl *get_cached_acl(struct inode *inode, int type);
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
+void forget_cached_acl(struct inode *inode, int type);
+void forget_all_cached_acls(struct inode *inode);
static inline void cache_no_acl(struct inode *inode)
{
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 6dacb93..1da693d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -184,13 +184,13 @@ static inline void page_dup_rmap(struct page *page)
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
int page_referenced_one(struct page *, struct vm_area_struct *,
- unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
+ unsigned long address, void *arg);
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
int try_to_unmap(struct page *, enum ttu_flags flags);
int try_to_unmap_one(struct page *, struct vm_area_struct *,
- unsigned long address, enum ttu_flags flags);
+ unsigned long address, void *arg);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
@@ -236,10 +236,27 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
/*
- * Called by migrate.c to remove migration ptes, but might be used more later.
+ * rmap_walk_control: To control rmap traversing for specific needs
+ *
+ * arg: passed to rmap_one() and invalid_vma()
+ * rmap_one: executed on each vma where page is mapped
+ * done: for checking traversing termination condition
+ * file_nonlinear: for handling file nonlinear mapping
+ * anon_lock: for getting anon_lock by optimized way rather than default
+ * invalid_vma: for skipping uninterested vma
*/
-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
- struct vm_area_struct *, unsigned long, void *), void *arg);
+struct rmap_walk_control {
+ void *arg;
+ int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
+ unsigned long addr, void *arg);
+ int (*done)(struct page *page);
+ int (*file_nonlinear)(struct page *, struct address_space *,
+ struct vm_area_struct *vma);
+ struct anon_vma *(*anon_lock)(struct page *page);
+ bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
+};
+
+int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
#else /* !CONFIG_MMU */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffccdad..485234d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -549,6 +549,7 @@ struct signal_struct {
atomic_t sigcnt;
atomic_t live;
int nr_threads;
+ struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
@@ -1271,6 +1272,7 @@ struct task_struct {
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group;
+ struct list_head thread_node;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
@@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void);
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)
+#define __for_each_thread(signal, t) \
+ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t) \
+ __for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t) \
+ for_each_process(p) for_each_thread(p, t)
+
static inline int get_nr_threads(struct task_struct *tsk)
{
return tsk->signal->nr_threads;
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index fde1b3e..06f544e 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -67,6 +67,48 @@ TRACE_EVENT(mm_compaction_migratepages,
__entry->nr_failed)
);
+TRACE_EVENT(mm_compaction_begin,
+ TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
+ unsigned long free_start, unsigned long zone_end),
+
+ TP_ARGS(zone_start, migrate_start, free_start, zone_end),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, zone_start)
+ __field(unsigned long, migrate_start)
+ __field(unsigned long, free_start)
+ __field(unsigned long, zone_end)
+ ),
+
+ TP_fast_assign(
+ __entry->zone_start = zone_start;
+ __entry->migrate_start = migrate_start;
+ __entry->free_start = free_start;
+ __entry->zone_end = zone_end;
+ ),
+
+ TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
+ __entry->zone_start,
+ __entry->migrate_start,
+ __entry->free_start,
+ __entry->zone_end)
+);
+
+TRACE_EVENT(mm_compaction_end,
+ TP_PROTO(int status),
+
+ TP_ARGS(status),
+
+ TP_STRUCT__entry(
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->status = status;
+ ),
+
+ TP_printk("status=%d", __entry->status)
+);
#endif /* _TRACE_COMPACTION_H */
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index ec2a6cc..3075ffb 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -45,6 +45,32 @@ TRACE_EVENT(mm_migrate_pages,
__print_symbolic(__entry->reason, MIGRATE_REASON))
);
+TRACE_EVENT(mm_numa_migrate_ratelimit,
+
+ TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
+
+ TP_ARGS(p, dst_nid, nr_pages),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN)
+ __field( pid_t, pid)
+ __field( int, dst_nid)
+ __field( unsigned long, nr_pages)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+ __entry->dst_nid = dst_nid;
+ __entry->nr_pages = nr_pages;
+ ),
+
+ TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
+ __entry->comm,
+ __entry->pid,
+ __entry->dst_nid,
+ __entry->nr_pages)
+);
#endif /* _TRACE_MIGRATE_H */
/* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 04c3084..67e1bbf 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -443,6 +443,93 @@ TRACE_EVENT(sched_process_hang,
);
#endif /* CONFIG_DETECT_HUNG_TASK */
+DECLARE_EVENT_CLASS(sched_move_task_template,
+
+ TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+ TP_ARGS(tsk, src_cpu, dst_cpu),
+
+ TP_STRUCT__entry(
+ __field( pid_t, pid )
+ __field( pid_t, tgid )
+ __field( pid_t, ngid )
+ __field( int, src_cpu )
+ __field( int, src_nid )
+ __field( int, dst_cpu )
+ __field( int, dst_nid )
+ ),
+
+ TP_fast_assign(
+ __entry->pid = task_pid_nr(tsk);
+ __entry->tgid = task_tgid_nr(tsk);
+ __entry->ngid = task_numa_group_id(tsk);
+ __entry->src_cpu = src_cpu;
+ __entry->src_nid = cpu_to_node(src_cpu);
+ __entry->dst_cpu = dst_cpu;
+ __entry->dst_nid = cpu_to_node(dst_cpu);
+ ),
+
+ TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
+ __entry->pid, __entry->tgid, __entry->ngid,
+ __entry->src_cpu, __entry->src_nid,
+ __entry->dst_cpu, __entry->dst_nid)
+);
+
+/*
+ * Tracks migration of tasks from one runqueue to another. Can be used to
+ * detect if automatic NUMA balancing is bouncing between nodes
+ */
+DEFINE_EVENT(sched_move_task_template, sched_move_numa,
+ TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+ TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
+ TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+ TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+TRACE_EVENT(sched_swap_numa,
+
+ TP_PROTO(struct task_struct *src_tsk, int src_cpu,
+ struct task_struct *dst_tsk, int dst_cpu),
+
+ TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
+
+ TP_STRUCT__entry(
+ __field( pid_t, src_pid )
+ __field( pid_t, src_tgid )
+ __field( pid_t, src_ngid )
+ __field( int, src_cpu )
+ __field( int, src_nid )
+ __field( pid_t, dst_pid )
+ __field( pid_t, dst_tgid )
+ __field( pid_t, dst_ngid )
+ __field( int, dst_cpu )
+ __field( int, dst_nid )
+ ),
+
+ TP_fast_assign(
+ __entry->src_pid = task_pid_nr(src_tsk);
+ __entry->src_tgid = task_tgid_nr(src_tsk);
+ __entry->src_ngid = task_numa_group_id(src_tsk);
+ __entry->src_cpu = src_cpu;
+ __entry->src_nid = cpu_to_node(src_cpu);
+ __entry->dst_pid = task_pid_nr(dst_tsk);
+ __entry->dst_tgid = task_tgid_nr(dst_tsk);
+ __entry->dst_ngid = task_numa_group_id(dst_tsk);
+ __entry->dst_cpu = dst_cpu;
+ __entry->dst_nid = cpu_to_node(dst_cpu);
+ ),
+
+ TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
+ __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
+ __entry->src_cpu, __entry->src_nid,
+ __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
+ __entry->dst_cpu, __entry->dst_nid)
+);
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */