mirror of
https://github.com/torvalds/linux.git
synced 2024-11-07 04:32:03 +00:00
37b3a8ff3e
The impetus for this is that we would like to move to 64-bit PMDs and PGDs, but that would result in only supporting a 42-bit address space with the current page table layout. It'd be nice to support at least 43-bits. The reason we'd end up with only 42-bits after making PMDs and PGDs 64-bit is that we only use half-page sized PTE tables in order to make PMDs line up to 4MB, the hardware huge page size we use. So what we do here is we make huge pages 8MB, and fabricate them using 4MB hw TLB entries. Facilitate this by providing a "REAL_HPAGE_SHIFT" which is used in places that really need to operate on hardware 4MB pages. Use full pages (512 entries) for PTE tables, and adjust PMD_SHIFT, PGD_SHIFT, and the build time CPP test as needed. Use a CPP test to make sure REAL_HPAGE_SHIFT and the _PAGE_SZHUGE_* we use match up. This makes the pgtable cache completely unused, so remove the code managing it and the state used in mm_context_t. Now we have less spinlocks taken in the page table allocation path. The technique we use to fabricate the 8MB pages is to transfer bit 22 from the missing virtual address into the PTEs physical address field. That takes care of the transparent huge pages case. For hugetlb, we fill things in at the PTE level and that code already puts the sub huge page physical bits into the PTEs, based upon the offset, so there is nothing special we need to do. It all just works out. So, a small amount of complexity in the THP case, but this code is about to get much simpler when we move the 64-bit PMDs as we can move away from the fancy 32-bit huge PMD encoding and just put a real PTE value in there. With bug fixes and help from Bob Picco. Signed-off-by: David S. Miller <davem@davemloft.net>
230 lines
4.5 KiB
C
230 lines
4.5 KiB
C
/* arch/sparc64/mm/tlb.c
|
|
*
|
|
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/init.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/preempt.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/tlb.h>
|
|
|
|
/* Heavily inspired by the ppc64 code. */
|
|
|
|
static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
|
|
|
|
void flush_tlb_pending(void)
|
|
{
|
|
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
|
|
struct mm_struct *mm = tb->mm;
|
|
|
|
if (!tb->tlb_nr)
|
|
goto out;
|
|
|
|
flush_tsb_user(tb);
|
|
|
|
if (CTX_VALID(mm->context)) {
|
|
if (tb->tlb_nr == 1) {
|
|
global_flush_tlb_page(mm, tb->vaddrs[0]);
|
|
} else {
|
|
#ifdef CONFIG_SMP
|
|
smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
|
|
&tb->vaddrs[0]);
|
|
#else
|
|
__flush_tlb_pending(CTX_HWBITS(tb->mm->context),
|
|
tb->tlb_nr, &tb->vaddrs[0]);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
tb->tlb_nr = 0;
|
|
|
|
out:
|
|
put_cpu_var(tlb_batch);
|
|
}
|
|
|
|
void arch_enter_lazy_mmu_mode(void)
|
|
{
|
|
struct tlb_batch *tb = &__get_cpu_var(tlb_batch);
|
|
|
|
tb->active = 1;
|
|
}
|
|
|
|
void arch_leave_lazy_mmu_mode(void)
|
|
{
|
|
struct tlb_batch *tb = &__get_cpu_var(tlb_batch);
|
|
|
|
if (tb->tlb_nr)
|
|
flush_tlb_pending();
|
|
tb->active = 0;
|
|
}
|
|
|
|
static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
|
|
bool exec)
|
|
{
|
|
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
|
|
unsigned long nr;
|
|
|
|
vaddr &= PAGE_MASK;
|
|
if (exec)
|
|
vaddr |= 0x1UL;
|
|
|
|
nr = tb->tlb_nr;
|
|
|
|
if (unlikely(nr != 0 && mm != tb->mm)) {
|
|
flush_tlb_pending();
|
|
nr = 0;
|
|
}
|
|
|
|
if (!tb->active) {
|
|
flush_tsb_user_page(mm, vaddr);
|
|
global_flush_tlb_page(mm, vaddr);
|
|
goto out;
|
|
}
|
|
|
|
if (nr == 0)
|
|
tb->mm = mm;
|
|
|
|
tb->vaddrs[nr] = vaddr;
|
|
tb->tlb_nr = ++nr;
|
|
if (nr >= TLB_BATCH_NR)
|
|
flush_tlb_pending();
|
|
|
|
out:
|
|
put_cpu_var(tlb_batch);
|
|
}
|
|
|
|
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
|
|
pte_t *ptep, pte_t orig, int fullmm)
|
|
{
|
|
if (tlb_type != hypervisor &&
|
|
pte_dirty(orig)) {
|
|
unsigned long paddr, pfn = pte_pfn(orig);
|
|
struct address_space *mapping;
|
|
struct page *page;
|
|
|
|
if (!pfn_valid(pfn))
|
|
goto no_cache_flush;
|
|
|
|
page = pfn_to_page(pfn);
|
|
if (PageReserved(page))
|
|
goto no_cache_flush;
|
|
|
|
/* A real file page? */
|
|
mapping = page_mapping(page);
|
|
if (!mapping)
|
|
goto no_cache_flush;
|
|
|
|
paddr = (unsigned long) page_address(page);
|
|
if ((paddr ^ vaddr) & (1 << 13))
|
|
flush_dcache_page_all(mm, page);
|
|
}
|
|
|
|
no_cache_flush:
|
|
if (!fullmm)
|
|
tlb_batch_add_one(mm, vaddr, pte_exec(orig));
|
|
}
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
|
|
pmd_t pmd, bool exec)
|
|
{
|
|
unsigned long end;
|
|
pte_t *pte;
|
|
|
|
pte = pte_offset_map(&pmd, vaddr);
|
|
end = vaddr + HPAGE_SIZE;
|
|
while (vaddr < end) {
|
|
if (pte_val(*pte) & _PAGE_VALID)
|
|
tlb_batch_add_one(mm, vaddr, exec);
|
|
pte++;
|
|
vaddr += PAGE_SIZE;
|
|
}
|
|
pte_unmap(pte);
|
|
}
|
|
|
|
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
|
pmd_t *pmdp, pmd_t pmd)
|
|
{
|
|
pmd_t orig = *pmdp;
|
|
|
|
*pmdp = pmd;
|
|
|
|
if (mm == &init_mm)
|
|
return;
|
|
|
|
if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
|
|
if (pmd_val(pmd) & PMD_ISHUGE)
|
|
mm->context.huge_pte_count++;
|
|
else
|
|
mm->context.huge_pte_count--;
|
|
|
|
/* Do not try to allocate the TSB hash table if we
|
|
* don't have one already. We have various locks held
|
|
* and thus we'll end up doing a GFP_KERNEL allocation
|
|
* in an atomic context.
|
|
*
|
|
* Instead, we let the first TLB miss on a hugepage
|
|
* take care of this.
|
|
*/
|
|
}
|
|
|
|
if (!pmd_none(orig)) {
|
|
bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
|
|
|
|
addr &= HPAGE_MASK;
|
|
if (pmd_val(orig) & PMD_ISHUGE) {
|
|
tlb_batch_add_one(mm, addr, exec);
|
|
tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
|
|
} else {
|
|
tlb_batch_pmd_scan(mm, addr, orig, exec);
|
|
}
|
|
}
|
|
}
|
|
|
|
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
|
pgtable_t pgtable)
|
|
{
|
|
struct list_head *lh = (struct list_head *) pgtable;
|
|
|
|
assert_spin_locked(&mm->page_table_lock);
|
|
|
|
/* FIFO */
|
|
if (!mm->pmd_huge_pte)
|
|
INIT_LIST_HEAD(lh);
|
|
else
|
|
list_add(lh, (struct list_head *) mm->pmd_huge_pte);
|
|
mm->pmd_huge_pte = pgtable;
|
|
}
|
|
|
|
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
|
|
{
|
|
struct list_head *lh;
|
|
pgtable_t pgtable;
|
|
|
|
assert_spin_locked(&mm->page_table_lock);
|
|
|
|
/* FIFO */
|
|
pgtable = mm->pmd_huge_pte;
|
|
lh = (struct list_head *) pgtable;
|
|
if (list_empty(lh))
|
|
mm->pmd_huge_pte = NULL;
|
|
else {
|
|
mm->pmd_huge_pte = (pgtable_t) lh->next;
|
|
list_del(lh);
|
|
}
|
|
pte_val(pgtable[0]) = 0;
|
|
pte_val(pgtable[1]) = 0;
|
|
|
|
return pgtable;
|
|
}
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|