ASIDs have always been stored as unsigned longs, ie. 32 bits on MIPS32 kernels. This is problematic because it is feasible for the ASID version to overflow & wrap around to zero. We currently attempt to handle this overflow by simply setting the ASID version to 1, using asid_first_version(), but we make no attempt to account for the fact that there may be mm_structs with stale ASIDs that have versions which we now reuse due to the overflow & wrap around. Encountering this requires that: 1) A struct mm_struct X is active on CPU A using ASID (V,n). 2) That mm is not used on CPU A for the length of time that it takes for CPU A's asid_cache to overflow & wrap around to the same version V that the mm had in step 1. During this time tasks using the mm could either be sleeping or only scheduled on other CPUs. 3) Some other mm Y becomes active on CPU A and is allocated the same ASID (V,n). 4) mm X now becomes active on CPU A again, and now incorrectly has the same ASID as mm Y. Where struct mm_struct ASIDs are represented above in the format (version, EntryHi.ASID), and on a typical MIPS32 system version will be 24 bits wide & EntryHi.ASID will be 8 bits wide. The length of time required in step 2 is highly dependent upon the CPU & workload, but for a hypothetical 2GHz CPU running a workload which generates a new ASID every 10000 cycles this period is around 248 days. Due to this long period of time & the fact that tasks need to be scheduled in just the right (or wrong, depending upon your inclination) way, this is obviously a difficult bug to encounter but it's entirely possible as evidenced by reports. In order to fix this, simply extend ASIDs to 64 bits even on MIPS32 builds. This will extend the period of time required for the hypothetical system above to encounter the problem from 28 days to around 3 trillion years, which feels safely outside of the realms of possibility. The cost of this is slightly more generated code in some commonly executed paths, but this is pretty minimal: | Code Size Gain | Percentage -----------------------|----------------|------------- decstation_defconfig | +270 | +0.00% 32r2el_defconfig | +652 | +0.01% 32r6el_defconfig | +1000 | +0.01% I have been unable to measure any change in performance of the LMbench lat_ctx or lat_proc tests resulting from the 64b ASIDs on either 32r2el_defconfig+interAptiv or 32r6el_defconfig+I6500 systems. Signed-off-by: Paul Burton <paul.burton@mips.com> Suggested-by: James Hogan <jhogan@kernel.org> References: https://lore.kernel.org/linux-mips/80B78A8B8FEE6145A87579E8435D78C30205D5F3@fzex.ruijie.com.cn/ References: https://lore.kernel.org/linux-mips/1488684260-18867-1-git-send-email-jiwei.sun@windriver.com/ Cc: Jiwei Sun <jiwei.sun@windriver.com> Cc: Yu Huabing <yhb@ruijie.com.cn> Cc: stable@vger.kernel.org # 2.6.12+ Cc: linux-mips@vger.kernel.org
222 lines
5.4 KiB
C
222 lines
5.4 KiB
C
/*
|
|
* Switch a MMU context.
|
|
*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1996, 1997, 1998, 1999 by Ralf Baechle
|
|
* Copyright (C) 1999 Silicon Graphics, Inc.
|
|
*/
|
|
#ifndef _ASM_MMU_CONTEXT_H
|
|
#define _ASM_MMU_CONTEXT_H
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/mm_types.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/dsemul.h>
|
|
#include <asm/hazards.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm-generic/mm_hooks.h>
|
|
|
|
#define htw_set_pwbase(pgd) \
|
|
do { \
|
|
if (cpu_has_htw) { \
|
|
write_c0_pwbase(pgd); \
|
|
back_to_back_c0_hazard(); \
|
|
} \
|
|
} while (0)
|
|
|
|
extern void tlbmiss_handler_setup_pgd(unsigned long);
|
|
extern char tlbmiss_handler_setup_pgd_end[];
|
|
|
|
/* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */
|
|
#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
|
|
do { \
|
|
tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \
|
|
htw_set_pwbase((unsigned long)pgd); \
|
|
} while (0)
|
|
|
|
#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
|
|
|
|
#define TLBMISS_HANDLER_RESTORE() \
|
|
write_c0_xcontext((unsigned long) smp_processor_id() << \
|
|
SMP_CPUID_REGSHIFT)
|
|
|
|
#define TLBMISS_HANDLER_SETUP() \
|
|
do { \
|
|
TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir); \
|
|
TLBMISS_HANDLER_RESTORE(); \
|
|
} while (0)
|
|
|
|
#else /* !CONFIG_MIPS_PGD_C0_CONTEXT: using pgd_current*/
|
|
|
|
/*
|
|
* For the fast tlb miss handlers, we keep a per cpu array of pointers
|
|
* to the current pgd for each processor. Also, the proc. id is stuffed
|
|
* into the context register.
|
|
*/
|
|
extern unsigned long pgd_current[];
|
|
|
|
#define TLBMISS_HANDLER_RESTORE() \
|
|
write_c0_context((unsigned long) smp_processor_id() << \
|
|
SMP_CPUID_REGSHIFT)
|
|
|
|
#define TLBMISS_HANDLER_SETUP() \
|
|
TLBMISS_HANDLER_RESTORE(); \
|
|
back_to_back_c0_hazard(); \
|
|
TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir)
|
|
#endif /* CONFIG_MIPS_PGD_C0_CONTEXT*/
|
|
|
|
/*
|
|
* All unused by hardware upper bits will be considered
|
|
* as a software asid extension.
|
|
*/
|
|
static inline u64 asid_version_mask(unsigned int cpu)
|
|
{
|
|
unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
|
|
|
|
return ~(u64)(asid_mask | (asid_mask - 1));
|
|
}
|
|
|
|
static inline u64 asid_first_version(unsigned int cpu)
|
|
{
|
|
return ~asid_version_mask(cpu) + 1;
|
|
}
|
|
|
|
#define cpu_context(cpu, mm) ((mm)->context.asid[cpu])
|
|
#define asid_cache(cpu) (cpu_data[cpu].asid_cache)
|
|
#define cpu_asid(cpu, mm) \
|
|
(cpu_context((cpu), (mm)) & cpu_asid_mask(&cpu_data[cpu]))
|
|
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
}
|
|
|
|
|
|
/* Normal, classic MIPS get_new_mmu_context */
|
|
static inline void
|
|
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
|
|
{
|
|
u64 asid = asid_cache(cpu);
|
|
|
|
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
|
|
if (cpu_has_vtag_icache)
|
|
flush_icache_all();
|
|
local_flush_tlb_all(); /* start new asid cycle */
|
|
}
|
|
|
|
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
|
|
}
|
|
|
|
/*
|
|
* Initialize the context related info for a new mm_struct
|
|
* instance.
|
|
*/
|
|
static inline int
|
|
init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
int i;
|
|
|
|
for_each_possible_cpu(i)
|
|
cpu_context(i, mm) = 0;
|
|
|
|
mm->context.bd_emupage_allocmap = NULL;
|
|
spin_lock_init(&mm->context.bd_emupage_lock);
|
|
init_waitqueue_head(&mm->context.bd_emupage_queue);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
unsigned int cpu = smp_processor_id();
|
|
unsigned long flags;
|
|
local_irq_save(flags);
|
|
|
|
htw_stop();
|
|
/* Check if our ASID is of an older version and thus invalid */
|
|
if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & asid_version_mask(cpu))
|
|
get_new_mmu_context(next, cpu);
|
|
write_c0_entryhi(cpu_asid(cpu, next));
|
|
TLBMISS_HANDLER_SETUP_PGD(next->pgd);
|
|
|
|
/*
|
|
* Mark current->active_mm as not "active" anymore.
|
|
* We don't want to mislead possible IPI tlb flush routines.
|
|
*/
|
|
cpumask_clear_cpu(cpu, mm_cpumask(prev));
|
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
htw_start();
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
/*
|
|
* Destroy context related info for an mm_struct that is about
|
|
* to be put to rest.
|
|
*/
|
|
static inline void destroy_context(struct mm_struct *mm)
|
|
{
|
|
dsemul_mm_cleanup(mm);
|
|
}
|
|
|
|
#define deactivate_mm(tsk, mm) do { } while (0)
|
|
|
|
/*
|
|
* After we have set current->mm to a new value, this activates
|
|
* the context for the new mm so we see the new mappings.
|
|
*/
|
|
static inline void
|
|
activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
|
{
|
|
unsigned long flags;
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
local_irq_save(flags);
|
|
|
|
htw_stop();
|
|
/* Unconditionally get a new ASID. */
|
|
get_new_mmu_context(next, cpu);
|
|
|
|
write_c0_entryhi(cpu_asid(cpu, next));
|
|
TLBMISS_HANDLER_SETUP_PGD(next->pgd);
|
|
|
|
/* mark mmu ownership change */
|
|
cpumask_clear_cpu(cpu, mm_cpumask(prev));
|
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
htw_start();
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
/*
|
|
* If mm is currently active_mm, we can't really drop it. Instead,
|
|
* we will get a new one for it.
|
|
*/
|
|
static inline void
|
|
drop_mmu_context(struct mm_struct *mm, unsigned cpu)
|
|
{
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
htw_stop();
|
|
|
|
if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
|
|
get_new_mmu_context(mm, cpu);
|
|
write_c0_entryhi(cpu_asid(cpu, mm));
|
|
} else {
|
|
/* will get a new context next time */
|
|
cpu_context(cpu, mm) = 0;
|
|
}
|
|
htw_start();
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
#endif /* _ASM_MMU_CONTEXT_H */
|