5bbe3547aa
Currently, pages which are marked as unevictable are protected from compaction, but not from other types of migration. The POSIX real time extension explicitly states that mlock() will prevent a major page fault, but the spirit of this is that mlock() should give a process the ability to control sources of latency, including minor page faults. However, the mlock manpage only explicitly says that a locked page will not be written to swap and this can cause some confusion. The compaction code today does not give a developer who wants to avoid swap but wants to have large contiguous areas available any method to achieve this state. This patch introduces a sysctl for controlling compaction behavior with respect to the unevictable lru. Users who demand no page faults after a page is present can set compact_unevictable_allowed to 0 and users who need the large contiguous areas can enable compaction on locked memory by leaving the default value of 1. To illustrate this problem I wrote a quick test program that mmaps a large number of 1MB files filled with random data. These maps are created locked and read only. Then every other mmap is unmapped and I attempt to allocate huge pages to the static huge page pool. When the compact_unevictable_allowed sysctl is 0, I cannot allocate hugepages after fragmenting memory. When the value is set to 1, allocations succeed. Signed-off-by: Eric B Munson <emunson@akamai.com> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Christoph Lameter <cl@linux.com> Acked-by: David Rientjes <rientjes@google.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Christoph Lameter <cl@linux.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
105 lines
3.4 KiB
C
105 lines
3.4 KiB
C
#ifndef _LINUX_COMPACTION_H
|
|
#define _LINUX_COMPACTION_H
|
|
|
|
/* Return values for compact_zone() and try_to_compact_pages() */
|
|
/* compaction didn't start as it was deferred due to past failures */
|
|
#define COMPACT_DEFERRED 0
|
|
/* compaction didn't start as it was not possible or direct reclaim was more suitable */
|
|
#define COMPACT_SKIPPED 1
|
|
/* compaction should continue to another pageblock */
|
|
#define COMPACT_CONTINUE 2
|
|
/* direct compaction partially compacted a zone and there are suitable pages */
|
|
#define COMPACT_PARTIAL 3
|
|
/* The full zone was compacted */
|
|
#define COMPACT_COMPLETE 4
|
|
/* For more detailed tracepoint output */
|
|
#define COMPACT_NO_SUITABLE_PAGE 5
|
|
#define COMPACT_NOT_SUITABLE_ZONE 6
|
|
/* When adding new state, please change compaction_status_string, too */
|
|
|
|
/* Used to signal whether compaction detected need_sched() or lock contention */
|
|
/* No contention detected */
|
|
#define COMPACT_CONTENDED_NONE 0
|
|
/* Either need_sched() was true or fatal signal pending */
|
|
#define COMPACT_CONTENDED_SCHED 1
|
|
/* Zone lock or lru_lock was contended in async compaction */
|
|
#define COMPACT_CONTENDED_LOCK 2
|
|
|
|
struct alloc_context; /* in mm/internal.h */
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
extern int sysctl_compact_memory;
|
|
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *length, loff_t *ppos);
|
|
extern int sysctl_extfrag_threshold;
|
|
extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *length, loff_t *ppos);
|
|
extern int sysctl_compact_unevictable_allowed;
|
|
|
|
extern int fragmentation_index(struct zone *zone, unsigned int order);
|
|
extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
|
|
int alloc_flags, const struct alloc_context *ac,
|
|
enum migrate_mode mode, int *contended);
|
|
extern void compact_pgdat(pg_data_t *pgdat, int order);
|
|
extern void reset_isolation_suitable(pg_data_t *pgdat);
|
|
extern unsigned long compaction_suitable(struct zone *zone, int order,
|
|
int alloc_flags, int classzone_idx);
|
|
|
|
extern void defer_compaction(struct zone *zone, int order);
|
|
extern bool compaction_deferred(struct zone *zone, int order);
|
|
extern void compaction_defer_reset(struct zone *zone, int order,
|
|
bool alloc_success);
|
|
extern bool compaction_restarting(struct zone *zone, int order);
|
|
|
|
#else
|
|
static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
|
|
unsigned int order, int alloc_flags,
|
|
const struct alloc_context *ac,
|
|
enum migrate_mode mode, int *contended)
|
|
{
|
|
return COMPACT_CONTINUE;
|
|
}
|
|
|
|
static inline void compact_pgdat(pg_data_t *pgdat, int order)
|
|
{
|
|
}
|
|
|
|
static inline void reset_isolation_suitable(pg_data_t *pgdat)
|
|
{
|
|
}
|
|
|
|
static inline unsigned long compaction_suitable(struct zone *zone, int order,
|
|
int alloc_flags, int classzone_idx)
|
|
{
|
|
return COMPACT_SKIPPED;
|
|
}
|
|
|
|
static inline void defer_compaction(struct zone *zone, int order)
|
|
{
|
|
}
|
|
|
|
static inline bool compaction_deferred(struct zone *zone, int order)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#endif /* CONFIG_COMPACTION */
|
|
|
|
#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
|
|
extern int compaction_register_node(struct node *node);
|
|
extern void compaction_unregister_node(struct node *node);
|
|
|
|
#else
|
|
|
|
static inline int compaction_register_node(struct node *node)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void compaction_unregister_node(struct node *node)
|
|
{
|
|
}
|
|
#endif /* CONFIG_COMPACTION && CONFIG_SYSFS && CONFIG_NUMA */
|
|
|
|
#endif /* _LINUX_COMPACTION_H */
|