linux/arch/x86/mm/pageattr_32.c
Andi Kleen 3c86882341 x86: c_p_a() fix: reorder TLB / cache flushes to follow Intel recommendation
Intel recommends to first flush the TLBs and then the caches
on caching attribute changes. c_p_a() previously did it the
other way round. Reorder that.

The procedure is still not fully compliant to the Intel documentation
because Intel recommends a all CPU synchronization step between
the TLB flushes and the cache flushes.

However on all new Intel CPUs this is now meaningless anyways
because they support Self-Snoop and can skip the cache flush
step anyway.

[ mingo@elte.hu: decoupled from clflush and ported it to x86.git ]

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 13:33:52 +01:00

297 lines
6.8 KiB
C

/*
* Copyright 2002 Andi Kleen, SuSE Labs.
* Thanks to Ben LaHaise for precious feedback.
*/
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
static DEFINE_SPINLOCK(cpa_lock);
static struct list_head df_list = LIST_HEAD_INIT(df_list);
pte_t *lookup_address(unsigned long address, int *level)
{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
pmd_t *pmd;
if (pgd_none(*pgd))
return NULL;
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return NULL;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
*level = 2;
if (pmd_large(*pmd))
return (pte_t *)pmd;
*level = 3;
return pte_offset_kernel(pmd, address);
}
static struct page *
split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
{
unsigned long addr;
struct page *base;
pte_t *pbase;
int i;
spin_unlock_irq(&cpa_lock);
base = alloc_pages(GFP_KERNEL, 0);
spin_lock_irq(&cpa_lock);
if (!base)
return NULL;
/*
* page_private is used to track the number of entries in
* the page table page that have non standard attributes.
*/
SetPagePrivate(base);
page_private(base) = 0;
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
addr == address ? prot : ref_prot));
}
return base;
}
static void cache_flush_page(struct page *p)
{
void *addr = page_address(p);
int i;
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
clflush(addr + i);
}
static void flush_kernel_map(void *arg)
{
struct list_head *lh = (struct list_head *)arg;
struct page *p;
/*
* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
__flush_tlb_all();
/* High level code is not ready for clflush yet */
if (0 && cpu_has_clflush) {
list_for_each_entry(p, lh, lru)
cache_flush_page(p);
} else {
if (boot_cpu_data.x86_model >= 4)
wbinvd();
}
}
static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
unsigned long flags;
struct page *page;
/* change init_mm */
set_pte_atomic(kpte, pte);
if (SHARED_KERNEL_PMD)
return;
spin_lock_irqsave(&pgd_lock, flags);
for (page = pgd_list; page; page = (struct page *)page->index) {
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
pud = pud_offset(pgd, address);
pmd = pmd_offset(pud, address);
set_pte_atomic((pte_t *)pmd, pte);
}
spin_unlock_irqrestore(&pgd_lock, flags);
}
/*
* No more special protections in this 2/4MB area - revert to a large
* page again.
*/
static inline void revert_page(struct page *kpte_page, unsigned long address)
{
pgprot_t ref_prot;
pte_t *linear;
ref_prot =
((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
linear = (pte_t *)
pmd_offset(pud_offset(pgd_offset_k(address), address), address);
set_pmd_pte(linear, address,
pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
ref_prot));
}
static inline void save_page(struct page *kpte_page)
{
if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
list_add(&kpte_page->lru, &df_list);
}
static int __change_page_attr(struct page *page, pgprot_t prot)
{
struct page *kpte_page;
unsigned long address;
pte_t *kpte;
int level;
BUG_ON(PageHighMem(page));
address = (unsigned long)page_address(page);
kpte = lookup_address(address, &level);
if (!kpte)
return -EINVAL;
kpte_page = virt_to_page(kpte);
BUG_ON(PageLRU(kpte_page));
BUG_ON(PageCompound(kpte_page));
if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
if (!pte_huge(*kpte)) {
set_pte_atomic(kpte, mk_pte(page, prot));
} else {
struct page *split;
pgprot_t ref_prot;
ref_prot =
((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
? PAGE_KERNEL_EXEC : PAGE_KERNEL;
split = split_large_page(address, prot, ref_prot);
if (!split)
return -ENOMEM;
set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
kpte_page = split;
}
page_private(kpte_page)++;
} else {
if (!pte_huge(*kpte)) {
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
BUG_ON(page_private(kpte_page) == 0);
page_private(kpte_page)--;
} else
BUG();
}
/*
* If the pte was reserved, it means it was created at boot
* time (not via split_large_page) and in turn we must not
* replace it with a largepage.
*/
save_page(kpte_page);
if (!PageReserved(kpte_page)) {
if (cpu_has_pse && (page_private(kpte_page) == 0)) {
paravirt_release_pt(page_to_pfn(kpte_page));
revert_page(kpte_page, address);
}
}
return 0;
}
static inline void flush_map(struct list_head *l)
{
on_each_cpu(flush_kernel_map, l, 1, 1);
}
/*
* Change the page attributes of an page in the linear mapping.
*
* This should be used when a page is mapped with a different caching policy
* than write-back somewhere - some CPUs do not like it when mappings with
* different caching policies exist. This changes the page attributes of the
* in kernel linear mapping too.
*
* The caller needs to ensure that there are no conflicting mappings elsewhere.
* This function only deals with the kernel linear map.
*
* Caller must call global_flush_tlb() after this.
*/
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
{
unsigned long flags;
int err = 0, i;
spin_lock_irqsave(&cpa_lock, flags);
for (i = 0; i < numpages; i++, page++) {
err = __change_page_attr(page, prot);
if (err)
break;
}
spin_unlock_irqrestore(&cpa_lock, flags);
return err;
}
EXPORT_SYMBOL(change_page_attr);
void global_flush_tlb(void)
{
struct page *pg, *next;
struct list_head l;
BUG_ON(irqs_disabled());
spin_lock_irq(&cpa_lock);
list_replace_init(&df_list, &l);
spin_unlock_irq(&cpa_lock);
flush_map(&l);
list_for_each_entry_safe(pg, next, &l, lru) {
list_del(&pg->lru);
clear_bit(PG_arch_1, &pg->flags);
if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
continue;
ClearPagePrivate(pg);
__free_page(pg);
}
}
EXPORT_SYMBOL(global_flush_tlb);
#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
return;
if (!enable) {
debug_check_no_locks_freed(page_address(page),
numpages * PAGE_SIZE);
}
/*
* the return value is ignored - the calls cannot fail,
* large pages are disabled at boot time.
*/
change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
/*
* we should perform an IPI and flush all tlbs,
* but that can deadlock->flush only current cpu.
*/
__flush_tlb_all();
}
#endif