- Serious sanitization and cleanup of the whole APERF/MPERF and
frequency invariance code along with removing the need for unnecessary IPIs - Finally remove a.out support - The usual trivial cleanups and fixes all over x86 -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmKLn48ACgkQEsHwGGHe VUpbkg/+PELrc0y/qxLM/+dyftKYY16Rhk6ZVAXfwqlh5ldyVQcLMUgKwDqYyTn2 XmgdI3cTcFlH2K7j6ANWLu0I9NPaviimUcEdMVcXt7aY5mGWk/q4hIyCYM8d41sV qKx4OjNSdyoofG6MtwFLJDuoeVg99Bqgvm4nP9BuxL0dZJ2hfcUZ7MTxYCx9ZYjK /3trx0NV287Yg/wm91EU0nLQzy9xbGS7WCmMnse6uxiUdm2vXbBt8oNFF4f747Dj 0cArfNrMgYq4Cv5bgt/Ki0NU/n4EOGDpJUSyQwlnjDKeN81ESPy7IWtTQ6cE/rJK BZeUIPiGiYHwtqXv0UTAPGLG8cAqKeab8u0xAOyrFVDkTc0+WlPJRsUAOmRRGIGE M8ZjoxrLeuFgxw6vKpVjaA+mDRj3qEpSH+IrTcekS98PN7gmVzvq03GobgGbT7YB xmtbThJa+514FfUVckkyC0+A56BknUIgVxwFPqrthE2atzYTbH67hW4U0yVWXXr7 2VI7ttozBrYVgHCWhD9eoT0uhyD74Vl6pqHnqzY9ShIfKVUGvMgKHHg04nLLtF7W hm87xV3Q5UEmXhTmDzT1rUZ99mBUxGbWxk227I9raMugIh7pp9wIr57+7O0LRYfX TdnE2+tL8RMi7+XzRH5iLhnwkrvahBESeHSQ7GVI1Y2zMmmFN+0= =Dks/ -----END PGP SIGNATURE----- Merge tag 'x86_cleanups_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 cleanups from Borislav Petkov: - Serious sanitization and cleanup of the whole APERF/MPERF and frequency invariance code along with removing the need for unnecessary IPIs - Finally remove a.out support - The usual trivial cleanups and fixes all over x86 * tag 'x86_cleanups_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86: Remove empty files x86/speculation: Add missing srbds=off to the mitigations= help text x86/prctl: Remove pointless task argument x86/aperfperf: Make it correct on 32bit and UP kernels x86/aperfmperf: Integrate the fallback code from show_cpuinfo() x86/aperfmperf: Replace arch_freq_get_on_cpu() x86/aperfmperf: Replace aperfmperf_get_khz() x86/aperfmperf: Store aperf/mperf data for cpu frequency reads x86/aperfmperf: Make parts of the frequency invariance code unconditional x86/aperfmperf: Restructure arch_scale_freq_tick() x86/aperfmperf: Put frequency invariance aperf/mperf data into a struct x86/aperfmperf: Untangle Intel and AMD frequency invariance init x86/aperfmperf: Separate AP/BP frequency invariance init x86/smp: Move APERF/MPERF code where it belongs x86/aperfmperf: Dont wake idle CPUs in arch_freq_get_on_cpu() x86/process: Fix kernel-doc warning due to a changed function name x86: Remove a.out support x86/mm: Replace nodes_weight() with nodes_empty() where appropriate x86: Replace cpumask_weight() with cpumask_empty() where appropriate x86/pkeys: Remove __arch_set_user_pkey_access() declaration ...
This commit is contained in:
commit
a13dc4d409
@ -3147,6 +3147,7 @@
|
||||
mds=off [X86]
|
||||
tsx_async_abort=off [X86]
|
||||
kvm.nx_huge_pages=off [X86]
|
||||
srbds=off [X86,INTEL]
|
||||
no_entry_flush [PPC]
|
||||
no_uaccess_flush [PPC]
|
||||
|
||||
|
@ -7385,7 +7385,6 @@ L: linux-mm@kvack.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
|
||||
F: arch/alpha/kernel/binfmt_loader.c
|
||||
F: arch/x86/ia32/ia32_aout.c
|
||||
F: fs/*binfmt_*.c
|
||||
F: fs/exec.c
|
||||
F: include/linux/binfmts.h
|
||||
|
@ -2842,13 +2842,6 @@ config IA32_EMULATION
|
||||
64-bit kernel. You should likely turn this on, unless you're
|
||||
100% sure that you don't have any 32-bit programs left.
|
||||
|
||||
config IA32_AOUT
|
||||
tristate "IA32 a.out support"
|
||||
depends on IA32_EMULATION
|
||||
depends on BROKEN
|
||||
help
|
||||
Support old a.out binaries in the 32bit emulation.
|
||||
|
||||
config X86_X32_ABI
|
||||
bool "x32 ABI for 64-bit mode"
|
||||
depends on X86_64
|
||||
|
@ -5,7 +5,5 @@
|
||||
|
||||
obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
|
||||
|
||||
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
|
||||
|
||||
audit-class-$(CONFIG_AUDIT) := audit.o
|
||||
obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
|
||||
|
@ -1,325 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* a.out loader for x86-64
|
||||
*
|
||||
* Copyright (C) 1991, 1992, 1996 Linus Torvalds
|
||||
* Hacked together by Andi Kleen
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/a.out.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/fcntl.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/user32.h>
|
||||
#include <asm/ia32.h>
|
||||
|
||||
#undef WARN_OLD
|
||||
|
||||
static int load_aout_binary(struct linux_binprm *);
|
||||
static int load_aout_library(struct file *);
|
||||
|
||||
static struct linux_binfmt aout_format = {
|
||||
.module = THIS_MODULE,
|
||||
.load_binary = load_aout_binary,
|
||||
.load_shlib = load_aout_library,
|
||||
};
|
||||
|
||||
static int set_brk(unsigned long start, unsigned long end)
|
||||
{
|
||||
start = PAGE_ALIGN(start);
|
||||
end = PAGE_ALIGN(end);
|
||||
if (end <= start)
|
||||
return 0;
|
||||
return vm_brk(start, end - start);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* create_aout_tables() parses the env- and arg-strings in new user
|
||||
* memory and creates the pointer tables from them, and puts their
|
||||
* addresses on the "stack", returning the new stack pointer value.
|
||||
*/
|
||||
static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
|
||||
{
|
||||
u32 __user *argv, *envp, *sp;
|
||||
int argc = bprm->argc, envc = bprm->envc;
|
||||
|
||||
sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
|
||||
sp -= envc+1;
|
||||
envp = sp;
|
||||
sp -= argc+1;
|
||||
argv = sp;
|
||||
put_user((unsigned long) envp, --sp);
|
||||
put_user((unsigned long) argv, --sp);
|
||||
put_user(argc, --sp);
|
||||
current->mm->arg_start = (unsigned long) p;
|
||||
while (argc-- > 0) {
|
||||
char c;
|
||||
|
||||
put_user((u32)(unsigned long)p, argv++);
|
||||
do {
|
||||
get_user(c, p++);
|
||||
} while (c);
|
||||
}
|
||||
put_user(0, argv);
|
||||
current->mm->arg_end = current->mm->env_start = (unsigned long) p;
|
||||
while (envc-- > 0) {
|
||||
char c;
|
||||
|
||||
put_user((u32)(unsigned long)p, envp++);
|
||||
do {
|
||||
get_user(c, p++);
|
||||
} while (c);
|
||||
}
|
||||
put_user(0, envp);
|
||||
current->mm->env_end = (unsigned long) p;
|
||||
return sp;
|
||||
}
|
||||
|
||||
/*
|
||||
* These are the functions used to load a.out style executables and shared
|
||||
* libraries. There is no binary dependent code anywhere else.
|
||||
*/
|
||||
static int load_aout_binary(struct linux_binprm *bprm)
|
||||
{
|
||||
unsigned long error, fd_offset, rlim;
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct exec ex;
|
||||
int retval;
|
||||
|
||||
ex = *((struct exec *) bprm->buf); /* exec-header */
|
||||
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
|
||||
N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
|
||||
N_TRSIZE(ex) || N_DRSIZE(ex) ||
|
||||
i_size_read(file_inode(bprm->file)) <
|
||||
ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
fd_offset = N_TXTOFF(ex);
|
||||
|
||||
/* Check initial limits. This avoids letting people circumvent
|
||||
* size limits imposed on them by creating programs with large
|
||||
* arrays in the data or bss.
|
||||
*/
|
||||
rlim = rlimit(RLIMIT_DATA);
|
||||
if (rlim >= RLIM_INFINITY)
|
||||
rlim = ~0;
|
||||
if (ex.a_data + ex.a_bss > rlim)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Flush all traces of the currently running executable */
|
||||
retval = begin_new_exec(bprm);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
/* OK, This is the point of no return */
|
||||
set_personality(PER_LINUX);
|
||||
set_personality_ia32(false);
|
||||
|
||||
setup_new_exec(bprm);
|
||||
|
||||
regs->cs = __USER32_CS;
|
||||
regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
|
||||
regs->r13 = regs->r14 = regs->r15 = 0;
|
||||
|
||||
current->mm->end_code = ex.a_text +
|
||||
(current->mm->start_code = N_TXTADDR(ex));
|
||||
current->mm->end_data = ex.a_data +
|
||||
(current->mm->start_data = N_DATADDR(ex));
|
||||
current->mm->brk = ex.a_bss +
|
||||
(current->mm->start_brk = N_BSSADDR(ex));
|
||||
|
||||
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
|
||||
if (N_MAGIC(ex) == OMAGIC) {
|
||||
unsigned long text_addr, map_size;
|
||||
|
||||
text_addr = N_TXTADDR(ex);
|
||||
map_size = ex.a_text+ex.a_data;
|
||||
|
||||
error = vm_brk(text_addr & PAGE_MASK, map_size);
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = read_code(bprm->file, text_addr, 32,
|
||||
ex.a_text + ex.a_data);
|
||||
if ((signed long)error < 0)
|
||||
return error;
|
||||
} else {
|
||||
#ifdef WARN_OLD
|
||||
static unsigned long error_time, error_time2;
|
||||
if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
|
||||
(N_MAGIC(ex) != NMAGIC) &&
|
||||
time_after(jiffies, error_time2 + 5*HZ)) {
|
||||
printk(KERN_NOTICE "executable not page aligned\n");
|
||||
error_time2 = jiffies;
|
||||
}
|
||||
|
||||
if ((fd_offset & ~PAGE_MASK) != 0 &&
|
||||
time_after(jiffies, error_time + 5*HZ)) {
|
||||
printk(KERN_WARNING
|
||||
"fd_offset is not page aligned. Please convert "
|
||||
"program: %pD\n",
|
||||
bprm->file);
|
||||
error_time = jiffies;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
|
||||
error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
read_code(bprm->file, N_TXTADDR(ex), fd_offset,
|
||||
ex.a_text+ex.a_data);
|
||||
goto beyond_if;
|
||||
}
|
||||
|
||||
error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
|
||||
PROT_READ | PROT_EXEC,
|
||||
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
|
||||
fd_offset);
|
||||
|
||||
if (error != N_TXTADDR(ex))
|
||||
return error;
|
||||
|
||||
error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
|
||||
fd_offset + ex.a_text);
|
||||
if (error != N_DATADDR(ex))
|
||||
return error;
|
||||
}
|
||||
|
||||
beyond_if:
|
||||
error = set_brk(current->mm->start_brk, current->mm->brk);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
set_binfmt(&aout_format);
|
||||
|
||||
current->mm->start_stack =
|
||||
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
|
||||
/* start thread */
|
||||
loadsegment(fs, 0);
|
||||
loadsegment(ds, __USER32_DS);
|
||||
loadsegment(es, __USER32_DS);
|
||||
load_gs_index(0);
|
||||
(regs)->ip = ex.a_entry;
|
||||
(regs)->sp = current->mm->start_stack;
|
||||
(regs)->flags = 0x200;
|
||||
(regs)->cs = __USER32_CS;
|
||||
(regs)->ss = __USER32_DS;
|
||||
regs->r8 = regs->r9 = regs->r10 = regs->r11 =
|
||||
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_aout_library(struct file *file)
|
||||
{
|
||||
unsigned long bss, start_addr, len, error;
|
||||
int retval;
|
||||
struct exec ex;
|
||||
loff_t pos = 0;
|
||||
|
||||
retval = -ENOEXEC;
|
||||
error = kernel_read(file, &ex, sizeof(ex), &pos);
|
||||
if (error != sizeof(ex))
|
||||
goto out;
|
||||
|
||||
/* We come in here for the regular a.out style of shared libraries */
|
||||
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
|
||||
N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
|
||||
i_size_read(file_inode(file)) <
|
||||
ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (N_FLAGS(ex))
|
||||
goto out;
|
||||
|
||||
/* For QMAGIC, the starting address is 0x20 into the page. We mask
|
||||
this off to get the starting address for the page */
|
||||
|
||||
start_addr = ex.a_entry & 0xfffff000;
|
||||
|
||||
if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
|
||||
#ifdef WARN_OLD
|
||||
static unsigned long error_time;
|
||||
if (time_after(jiffies, error_time + 5*HZ)) {
|
||||
printk(KERN_WARNING
|
||||
"N_TXTOFF is not page aligned. Please convert "
|
||||
"library: %pD\n",
|
||||
file);
|
||||
error_time = jiffies;
|
||||
}
|
||||
#endif
|
||||
retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
read_code(file, start_addr, N_TXTOFF(ex),
|
||||
ex.a_text + ex.a_data);
|
||||
retval = 0;
|
||||
goto out;
|
||||
}
|
||||
/* Now use mmap to map the library into memory. */
|
||||
error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
|
||||
N_TXTOFF(ex));
|
||||
retval = error;
|
||||
if (error != start_addr)
|
||||
goto out;
|
||||
|
||||
len = PAGE_ALIGN(ex.a_text + ex.a_data);
|
||||
bss = ex.a_text + ex.a_data + ex.a_bss;
|
||||
if (bss > len) {
|
||||
retval = vm_brk(start_addr + len, bss - len);
|
||||
if (retval)
|
||||
goto out;
|
||||
}
|
||||
retval = 0;
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int __init init_aout_binfmt(void)
|
||||
{
|
||||
register_binfmt(&aout_format);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit exit_aout_binfmt(void)
|
||||
{
|
||||
unregister_binfmt(&aout_format);
|
||||
}
|
||||
|
||||
module_init(init_aout_binfmt);
|
||||
module_exit(exit_aout_binfmt);
|
||||
MODULE_LICENSE("GPL");
|
@ -36,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, int action);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void ap_init_aperfmperf(void);
|
||||
|
||||
int mwait_usable(const struct cpuinfo_x86 *);
|
||||
|
||||
unsigned int x86_family(unsigned int sig);
|
||||
|
@ -162,7 +162,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
|
||||
}
|
||||
|
||||
/* prctl */
|
||||
struct task_struct;
|
||||
extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
|
||||
extern long fpu_xstate_prctl(int option, unsigned long arg2);
|
||||
|
||||
#endif /* _ASM_X86_FPU_API_H */
|
||||
|
@ -41,9 +41,6 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
|
||||
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
||||
}
|
||||
|
||||
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
unsigned long init_val);
|
||||
|
||||
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
|
||||
|
||||
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
|
||||
@ -118,11 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
unsigned long init_val);
|
||||
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
unsigned long init_val);
|
||||
|
||||
static inline int vma_pkey(struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
|
||||
|
@ -42,7 +42,6 @@ void x86_configure_nx(void);
|
||||
|
||||
extern int reboot_force;
|
||||
|
||||
long do_arch_prctl_common(struct task_struct *task, int option,
|
||||
unsigned long arg2);
|
||||
long do_arch_prctl_common(int option, unsigned long arg2);
|
||||
|
||||
#endif /* _ASM_X86_PROTO_H */
|
||||
|
@ -212,30 +212,19 @@ static inline long arch_scale_freq_capacity(int cpu)
|
||||
}
|
||||
#define arch_scale_freq_capacity arch_scale_freq_capacity
|
||||
|
||||
extern void arch_set_max_freq_ratio(bool turbo_disabled);
|
||||
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
|
||||
#else
|
||||
static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
|
||||
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
|
||||
#endif
|
||||
|
||||
extern void arch_scale_freq_tick(void);
|
||||
#define arch_scale_freq_tick arch_scale_freq_tick
|
||||
|
||||
extern void arch_set_max_freq_ratio(bool turbo_disabled);
|
||||
void init_freq_invariance(bool secondary, bool cppc_ready);
|
||||
#else
|
||||
static inline void arch_set_max_freq_ratio(bool turbo_disabled)
|
||||
{
|
||||
}
|
||||
static inline void init_freq_invariance(bool secondary, bool cppc_ready)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI_CPPC_LIB
|
||||
void init_freq_invariance_cppc(void);
|
||||
#define arch_init_invariance_cppc init_freq_invariance_cppc
|
||||
|
||||
bool amd_set_max_freq_ratio(u64 *ratio);
|
||||
#else
|
||||
static inline bool amd_set_max_freq_ratio(u64 *ratio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_TOPOLOGY_H */
|
||||
|
@ -50,20 +50,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
|
||||
return err;
|
||||
}
|
||||
|
||||
bool amd_set_max_freq_ratio(u64 *ratio)
|
||||
static void amd_set_max_freq_ratio(void)
|
||||
{
|
||||
struct cppc_perf_caps perf_caps;
|
||||
u64 highest_perf, nominal_perf;
|
||||
u64 perf_ratio;
|
||||
int rc;
|
||||
|
||||
if (!ratio)
|
||||
return false;
|
||||
|
||||
rc = cppc_get_perf_caps(0, &perf_caps);
|
||||
if (rc) {
|
||||
pr_debug("Could not retrieve perf counters (%d)\n", rc);
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
highest_perf = amd_get_highest_perf();
|
||||
@ -71,7 +68,7 @@ bool amd_set_max_freq_ratio(u64 *ratio)
|
||||
|
||||
if (!highest_perf || !nominal_perf) {
|
||||
pr_debug("Could not retrieve highest or nominal performance\n");
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
|
||||
@ -79,25 +76,27 @@ bool amd_set_max_freq_ratio(u64 *ratio)
|
||||
perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
|
||||
if (!perf_ratio) {
|
||||
pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
*ratio = perf_ratio;
|
||||
arch_set_max_freq_ratio(false);
|
||||
|
||||
return true;
|
||||
freq_invariance_set_perf_ratio(perf_ratio, false);
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(freq_invariance_lock);
|
||||
|
||||
void init_freq_invariance_cppc(void)
|
||||
{
|
||||
static bool secondary;
|
||||
static bool init_done;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
return;
|
||||
|
||||
mutex_lock(&freq_invariance_lock);
|
||||
|
||||
init_freq_invariance(secondary, true);
|
||||
secondary = true;
|
||||
|
||||
if (!init_done)
|
||||
amd_set_max_freq_ratio();
|
||||
init_done = true;
|
||||
mutex_unlock(&freq_invariance_lock);
|
||||
}
|
||||
|
@ -6,146 +6,446 @@
|
||||
* Copyright (C) 2017 Intel Corp.
|
||||
* Author: Len Brown <len.brown@intel.com>
|
||||
*/
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
struct aperfmperf_sample {
|
||||
unsigned int khz;
|
||||
atomic_t scfpending;
|
||||
ktime_t time;
|
||||
u64 aperf;
|
||||
u64 mperf;
|
||||
struct aperfmperf {
|
||||
seqcount_t seq;
|
||||
unsigned long last_update;
|
||||
u64 acnt;
|
||||
u64 mcnt;
|
||||
u64 aperf;
|
||||
u64 mperf;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
|
||||
.seq = SEQCNT_ZERO(cpu_samples.seq)
|
||||
};
|
||||
|
||||
#define APERFMPERF_CACHE_THRESHOLD_MS 10
|
||||
#define APERFMPERF_REFRESH_DELAY_MS 10
|
||||
#define APERFMPERF_STALE_THRESHOLD_MS 1000
|
||||
|
||||
/*
|
||||
* aperfmperf_snapshot_khz()
|
||||
* On the current CPU, snapshot APERF, MPERF, and jiffies
|
||||
* unless we already did it within 10ms
|
||||
* calculate kHz, save snapshot
|
||||
*/
|
||||
static void aperfmperf_snapshot_khz(void *dummy)
|
||||
static void init_counter_refs(void)
|
||||
{
|
||||
u64 aperf, aperf_delta;
|
||||
u64 mperf, mperf_delta;
|
||||
struct aperfmperf_sample *s = this_cpu_ptr(&samples);
|
||||
unsigned long flags;
|
||||
u64 aperf, mperf;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
local_irq_restore(flags);
|
||||
|
||||
aperf_delta = aperf - s->aperf;
|
||||
mperf_delta = mperf - s->mperf;
|
||||
|
||||
/*
|
||||
* There is no architectural guarantee that MPERF
|
||||
* increments faster than we can read it.
|
||||
*/
|
||||
if (mperf_delta == 0)
|
||||
return;
|
||||
|
||||
s->time = ktime_get();
|
||||
s->aperf = aperf;
|
||||
s->mperf = mperf;
|
||||
s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
|
||||
atomic_set_release(&s->scfpending, 0);
|
||||
this_cpu_write(cpu_samples.aperf, aperf);
|
||||
this_cpu_write(cpu_samples.mperf, mperf);
|
||||
}
|
||||
|
||||
static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
|
||||
/*
|
||||
* APERF/MPERF frequency ratio computation.
|
||||
*
|
||||
* The scheduler wants to do frequency invariant accounting and needs a <1
|
||||
* ratio to account for the 'current' frequency, corresponding to
|
||||
* freq_curr / freq_max.
|
||||
*
|
||||
* Since the frequency freq_curr on x86 is controlled by micro-controller and
|
||||
* our P-state setting is little more than a request/hint, we need to observe
|
||||
* the effective frequency 'BusyMHz', i.e. the average frequency over a time
|
||||
* interval after discarding idle time. This is given by:
|
||||
*
|
||||
* BusyMHz = delta_APERF / delta_MPERF * freq_base
|
||||
*
|
||||
* where freq_base is the max non-turbo P-state.
|
||||
*
|
||||
* The freq_max term has to be set to a somewhat arbitrary value, because we
|
||||
* can't know which turbo states will be available at a given point in time:
|
||||
* it all depends on the thermal headroom of the entire package. We set it to
|
||||
* the turbo level with 4 cores active.
|
||||
*
|
||||
* Benchmarks show that's a good compromise between the 1C turbo ratio
|
||||
* (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
|
||||
* which would ignore the entire turbo range (a conspicuous part, making
|
||||
* freq_curr/freq_max always maxed out).
|
||||
*
|
||||
* An exception to the heuristic above is the Atom uarch, where we choose the
|
||||
* highest turbo level for freq_max since Atom's are generally oriented towards
|
||||
* power efficiency.
|
||||
*
|
||||
* Setting freq_max to anything less than the 1C turbo ratio makes the ratio
|
||||
* freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
|
||||
*/
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
|
||||
|
||||
static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
|
||||
static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
|
||||
|
||||
void arch_set_max_freq_ratio(bool turbo_disabled)
|
||||
{
|
||||
s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
|
||||
struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
|
||||
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
|
||||
arch_turbo_freq_ratio;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
|
||||
|
||||
/* Don't bother re-computing within the cache threshold time. */
|
||||
if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
|
||||
return true;
|
||||
static bool __init turbo_disabled(void)
|
||||
{
|
||||
u64 misc_en;
|
||||
int err;
|
||||
|
||||
if (!atomic_xchg(&s->scfpending, 1) || wait)
|
||||
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
|
||||
err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
/* Return false if the previous iteration was too long ago. */
|
||||
return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
|
||||
return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
|
||||
}
|
||||
|
||||
unsigned int aperfmperf_get_khz(int cpu)
|
||||
static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
{
|
||||
if (!cpu_khz)
|
||||
return 0;
|
||||
int err;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
return 0;
|
||||
err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
if (rcu_is_idle_cpu(cpu))
|
||||
return 0; /* Idle CPUs are completely uninteresting. */
|
||||
*base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
|
||||
*turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
|
||||
|
||||
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
|
||||
return per_cpu(samples.khz, cpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
void arch_freq_prepare_all(void)
|
||||
#define X86_MATCH(model) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
|
||||
INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
|
||||
|
||||
static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
|
||||
X86_MATCH(XEON_PHI_KNL),
|
||||
X86_MATCH(XEON_PHI_KNM),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
|
||||
X86_MATCH(SKYLAKE_X),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
|
||||
X86_MATCH(ATOM_GOLDMONT),
|
||||
X86_MATCH(ATOM_GOLDMONT_D),
|
||||
X86_MATCH(ATOM_GOLDMONT_PLUS),
|
||||
{}
|
||||
};
|
||||
|
||||
static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
|
||||
int num_delta_fratio)
|
||||
{
|
||||
ktime_t now = ktime_get();
|
||||
bool wait = false;
|
||||
int cpu;
|
||||
int fratio, delta_fratio, found;
|
||||
int err, i;
|
||||
u64 msr;
|
||||
|
||||
if (!cpu_khz)
|
||||
return;
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
continue;
|
||||
if (rcu_is_idle_cpu(cpu))
|
||||
continue; /* Idle CPUs are completely uninteresting. */
|
||||
if (!aperfmperf_snapshot_cpu(cpu, now, false))
|
||||
wait = true;
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
fratio = (msr >> 8) & 0xFF;
|
||||
i = 16;
|
||||
found = 0;
|
||||
do {
|
||||
if (found >= num_delta_fratio) {
|
||||
*turbo_freq = fratio;
|
||||
return true;
|
||||
}
|
||||
|
||||
delta_fratio = (msr >> (i + 5)) & 0x7;
|
||||
|
||||
if (delta_fratio) {
|
||||
found += 1;
|
||||
fratio -= delta_fratio;
|
||||
}
|
||||
|
||||
i += 8;
|
||||
} while (i < 64);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
|
||||
{
|
||||
u64 ratios, counts;
|
||||
u32 group_size;
|
||||
int err, i;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
group_size = (counts >> i) & 0xFF;
|
||||
if (group_size >= size) {
|
||||
*turbo_freq = (ratios >> i) & 0xFF;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (wait)
|
||||
msleep(APERFMPERF_REFRESH_DELAY_MS);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
{
|
||||
u64 msr;
|
||||
int err;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
*turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
|
||||
|
||||
/* The CPU may have less than 4 cores */
|
||||
if (!*turbo_freq)
|
||||
*turbo_freq = msr & 0xFF; /* 1C turbo */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __init intel_set_max_freq_ratio(void)
|
||||
{
|
||||
u64 base_freq, turbo_freq;
|
||||
u64 turbo_ratio;
|
||||
|
||||
if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
|
||||
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
|
||||
knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
|
||||
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
|
||||
goto out;
|
||||
|
||||
if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
|
||||
goto out;
|
||||
|
||||
return false;
|
||||
|
||||
out:
|
||||
/*
|
||||
* Some hypervisors advertise X86_FEATURE_APERFMPERF
|
||||
* but then fill all MSR's with zeroes.
|
||||
* Some CPUs have turbo boost but don't declare any turbo ratio
|
||||
* in MSR_TURBO_RATIO_LIMIT.
|
||||
*/
|
||||
if (!base_freq || !turbo_freq) {
|
||||
pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
|
||||
if (!turbo_ratio) {
|
||||
pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
arch_turbo_freq_ratio = turbo_ratio;
|
||||
arch_set_max_freq_ratio(turbo_disabled());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static struct syscore_ops freq_invariance_syscore_ops = {
|
||||
.resume = init_counter_refs,
|
||||
};
|
||||
|
||||
static void register_freq_invariance_syscore_ops(void)
|
||||
{
|
||||
register_syscore_ops(&freq_invariance_syscore_ops);
|
||||
}
|
||||
#else
|
||||
static inline void register_freq_invariance_syscore_ops(void) {}
|
||||
#endif
|
||||
|
||||
static void freq_invariance_enable(void)
|
||||
{
|
||||
if (static_branch_unlikely(&arch_scale_freq_key)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
static_branch_enable(&arch_scale_freq_key);
|
||||
register_freq_invariance_syscore_ops();
|
||||
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
|
||||
}
|
||||
|
||||
void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
|
||||
{
|
||||
arch_turbo_freq_ratio = ratio;
|
||||
arch_set_max_freq_ratio(turbo_disabled);
|
||||
freq_invariance_enable();
|
||||
}
|
||||
|
||||
static void __init bp_init_freq_invariance(void)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return;
|
||||
|
||||
if (intel_set_max_freq_ratio())
|
||||
freq_invariance_enable();
|
||||
}
|
||||
|
||||
static void disable_freq_invariance_workfn(struct work_struct *work)
|
||||
{
|
||||
static_branch_disable(&arch_scale_freq_key);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(disable_freq_invariance_work,
|
||||
disable_freq_invariance_workfn);
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
|
||||
|
||||
static void scale_freq_tick(u64 acnt, u64 mcnt)
|
||||
{
|
||||
u64 freq_scale;
|
||||
|
||||
if (!arch_scale_freq_invariant())
|
||||
return;
|
||||
|
||||
if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
|
||||
goto error;
|
||||
|
||||
if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
|
||||
goto error;
|
||||
|
||||
freq_scale = div64_u64(acnt, mcnt);
|
||||
if (!freq_scale)
|
||||
goto error;
|
||||
|
||||
if (freq_scale > SCHED_CAPACITY_SCALE)
|
||||
freq_scale = SCHED_CAPACITY_SCALE;
|
||||
|
||||
this_cpu_write(arch_freq_scale, freq_scale);
|
||||
return;
|
||||
|
||||
error:
|
||||
pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
|
||||
schedule_work(&disable_freq_invariance_work);
|
||||
}
|
||||
#else
|
||||
static inline void bp_init_freq_invariance(void) { }
|
||||
static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
|
||||
#endif /* CONFIG_X86_64 && CONFIG_SMP */
|
||||
|
||||
void arch_scale_freq_tick(void)
|
||||
{
|
||||
struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
|
||||
u64 acnt, mcnt, aperf, mperf;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
acnt = aperf - s->aperf;
|
||||
mcnt = mperf - s->mperf;
|
||||
|
||||
s->aperf = aperf;
|
||||
s->mperf = mperf;
|
||||
|
||||
raw_write_seqcount_begin(&s->seq);
|
||||
s->last_update = jiffies;
|
||||
s->acnt = acnt;
|
||||
s->mcnt = mcnt;
|
||||
raw_write_seqcount_end(&s->seq);
|
||||
|
||||
scale_freq_tick(acnt, mcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Discard samples older than the define maximum sample age of 20ms. There
|
||||
* is no point in sending IPIs in such a case. If the scheduler tick was
|
||||
* not running then the CPU is either idle or isolated.
|
||||
*/
|
||||
#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
|
||||
|
||||
unsigned int arch_freq_get_on_cpu(int cpu)
|
||||
{
|
||||
struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
|
||||
struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
|
||||
unsigned int seq, freq;
|
||||
unsigned long last;
|
||||
u64 acnt, mcnt;
|
||||
|
||||
if (!cpu_khz)
|
||||
return 0;
|
||||
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
|
||||
goto fallback;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
do {
|
||||
seq = raw_read_seqcount_begin(&s->seq);
|
||||
last = s->last_update;
|
||||
acnt = s->acnt;
|
||||
mcnt = s->mcnt;
|
||||
} while (read_seqcount_retry(&s->seq, seq));
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
return 0;
|
||||
/*
|
||||
* Bail on invalid count and when the last update was too long ago,
|
||||
* which covers idle and NOHZ full CPUs.
|
||||
*/
|
||||
if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
|
||||
goto fallback;
|
||||
|
||||
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
||||
return per_cpu(samples.khz, cpu);
|
||||
return div64_u64((cpu_khz * acnt), mcnt);
|
||||
|
||||
msleep(APERFMPERF_REFRESH_DELAY_MS);
|
||||
atomic_set(&s->scfpending, 1);
|
||||
smp_mb(); /* ->scfpending before smp_call_function_single(). */
|
||||
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
|
||||
|
||||
return per_cpu(samples.khz, cpu);
|
||||
fallback:
|
||||
freq = cpufreq_quick_get(cpu);
|
||||
return freq ? freq : cpu_khz;
|
||||
}
|
||||
|
||||
static int __init bp_init_aperfmperf(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
init_counter_refs();
|
||||
bp_init_freq_invariance();
|
||||
return 0;
|
||||
}
|
||||
early_initcall(bp_init_aperfmperf);
|
||||
|
||||
void ap_init_aperfmperf(void)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
|
||||
init_counter_refs();
|
||||
}
|
||||
|
@ -84,14 +84,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TSC)) {
|
||||
unsigned int freq = aperfmperf_get_khz(cpu);
|
||||
unsigned int freq = arch_freq_get_on_cpu(cpu);
|
||||
|
||||
if (!freq)
|
||||
freq = cpufreq_quick_get(cpu);
|
||||
if (!freq)
|
||||
freq = cpu_khz;
|
||||
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
|
||||
freq / 1000, (freq % 1000));
|
||||
seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
|
||||
}
|
||||
|
||||
/* Cache size */
|
||||
|
@ -341,14 +341,14 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
|
||||
|
||||
/* Check whether cpus belong to parent ctrl group */
|
||||
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check whether cpus are dropped from this group */
|
||||
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
/* Give any dropped cpus to parent rdtgroup */
|
||||
cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
|
||||
update_closid_rmid(tmpmask, prgrp);
|
||||
@ -359,7 +359,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
|
||||
* and update per-cpu rmid
|
||||
*/
|
||||
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
head = &prgrp->mon.crdtgrp_list;
|
||||
list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
|
||||
if (crgrp == rdtgrp)
|
||||
@ -394,7 +394,7 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
|
||||
|
||||
/* Check whether cpus are dropped from this group */
|
||||
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
/* Can't drop from default group */
|
||||
if (rdtgrp == &rdtgroup_default) {
|
||||
rdt_last_cmd_puts("Can't drop CPUs from default group\n");
|
||||
@ -413,12 +413,12 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
|
||||
* and update per-cpu closid/rmid.
|
||||
*/
|
||||
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
|
||||
if (r == rdtgrp)
|
||||
continue;
|
||||
cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
|
||||
if (cpumask_weight(tmpmask1))
|
||||
if (!cpumask_empty(tmpmask1))
|
||||
cpumask_rdtgrp_clear(r, tmpmask1);
|
||||
}
|
||||
update_closid_rmid(tmpmask, rdtgrp);
|
||||
@ -488,7 +488,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
|
||||
|
||||
/* check that user didn't specify any offline cpus */
|
||||
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
|
||||
if (cpumask_weight(tmpmask)) {
|
||||
if (!cpumask_empty(tmpmask)) {
|
||||
ret = -EINVAL;
|
||||
rdt_last_cmd_puts("Can only assign online CPUs\n");
|
||||
goto unlock;
|
||||
|
@ -1687,16 +1687,13 @@ EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
|
||||
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
|
||||
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
|
||||
*/
|
||||
long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
|
||||
long fpu_xstate_prctl(int option, unsigned long arg2)
|
||||
{
|
||||
u64 __user *uptr = (u64 __user *)arg2;
|
||||
u64 permitted, supported;
|
||||
unsigned long idx = arg2;
|
||||
bool guest = false;
|
||||
|
||||
if (tsk != current)
|
||||
return -EPERM;
|
||||
|
||||
switch (option) {
|
||||
case ARCH_GET_XCOMP_SUPP:
|
||||
supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
|
||||
|
@ -335,7 +335,7 @@ static int get_cpuid_mode(void)
|
||||
return !test_thread_flag(TIF_NOCPUID);
|
||||
}
|
||||
|
||||
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
|
||||
static int set_cpuid_mode(unsigned long cpuid_enabled)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
|
||||
return -ENODEV;
|
||||
@ -406,7 +406,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
|
||||
}
|
||||
|
||||
/**
|
||||
* tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
|
||||
* native_tss_update_io_bitmap - Update I/O bitmap before exiting to user mode
|
||||
*/
|
||||
void native_tss_update_io_bitmap(void)
|
||||
{
|
||||
@ -989,20 +989,19 @@ unsigned long __get_wchan(struct task_struct *p)
|
||||
return addr;
|
||||
}
|
||||
|
||||
long do_arch_prctl_common(struct task_struct *task, int option,
|
||||
unsigned long arg2)
|
||||
long do_arch_prctl_common(int option, unsigned long arg2)
|
||||
{
|
||||
switch (option) {
|
||||
case ARCH_GET_CPUID:
|
||||
return get_cpuid_mode();
|
||||
case ARCH_SET_CPUID:
|
||||
return set_cpuid_mode(task, arg2);
|
||||
return set_cpuid_mode(arg2);
|
||||
case ARCH_GET_XCOMP_SUPP:
|
||||
case ARCH_GET_XCOMP_PERM:
|
||||
case ARCH_REQ_XCOMP_PERM:
|
||||
case ARCH_GET_XCOMP_GUEST_PERM:
|
||||
case ARCH_REQ_XCOMP_GUEST_PERM:
|
||||
return fpu_xstate_prctl(task, option, arg2);
|
||||
return fpu_xstate_prctl(option, arg2);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -222,5 +222,5 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
return do_arch_prctl_common(option, arg2);
|
||||
}
|
||||
|
@ -844,7 +844,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
|
||||
ret = do_arch_prctl_64(current, option, arg2);
|
||||
if (ret == -EINVAL)
|
||||
ret = do_arch_prctl_common(current, option, arg2);
|
||||
ret = do_arch_prctl_common(option, arg2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -852,7 +852,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
return do_arch_prctl_common(option, arg2);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -56,7 +56,6 @@
|
||||
#include <linux/numa.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/desc.h>
|
||||
@ -188,7 +187,7 @@ static void smp_callin(void)
|
||||
*/
|
||||
set_cpu_sibling_map(raw_smp_processor_id());
|
||||
|
||||
init_freq_invariance(true, false);
|
||||
ap_init_aperfmperf();
|
||||
|
||||
/*
|
||||
* Get our bogomips.
|
||||
@ -1406,7 +1405,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
smp_prepare_cpus_common();
|
||||
|
||||
init_freq_invariance(false, false);
|
||||
smp_sanity_check();
|
||||
|
||||
switch (apic_intr_mode) {
|
||||
@ -1858,357 +1856,3 @@ void native_play_dead(void)
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* APERF/MPERF frequency ratio computation.
|
||||
*
|
||||
* The scheduler wants to do frequency invariant accounting and needs a <1
|
||||
* ratio to account for the 'current' frequency, corresponding to
|
||||
* freq_curr / freq_max.
|
||||
*
|
||||
* Since the frequency freq_curr on x86 is controlled by micro-controller and
|
||||
* our P-state setting is little more than a request/hint, we need to observe
|
||||
* the effective frequency 'BusyMHz', i.e. the average frequency over a time
|
||||
* interval after discarding idle time. This is given by:
|
||||
*
|
||||
* BusyMHz = delta_APERF / delta_MPERF * freq_base
|
||||
*
|
||||
* where freq_base is the max non-turbo P-state.
|
||||
*
|
||||
* The freq_max term has to be set to a somewhat arbitrary value, because we
|
||||
* can't know which turbo states will be available at a given point in time:
|
||||
* it all depends on the thermal headroom of the entire package. We set it to
|
||||
* the turbo level with 4 cores active.
|
||||
*
|
||||
* Benchmarks show that's a good compromise between the 1C turbo ratio
|
||||
* (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
|
||||
* which would ignore the entire turbo range (a conspicuous part, making
|
||||
* freq_curr/freq_max always maxed out).
|
||||
*
|
||||
* An exception to the heuristic above is the Atom uarch, where we choose the
|
||||
* highest turbo level for freq_max since Atom's are generally oriented towards
|
||||
* power efficiency.
|
||||
*
|
||||
* Setting freq_max to anything less than the 1C turbo ratio makes the ratio
|
||||
* freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
|
||||
*/
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
|
||||
|
||||
static DEFINE_PER_CPU(u64, arch_prev_aperf);
|
||||
static DEFINE_PER_CPU(u64, arch_prev_mperf);
|
||||
static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
|
||||
static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
|
||||
|
||||
void arch_set_max_freq_ratio(bool turbo_disabled)
|
||||
{
|
||||
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
|
||||
arch_turbo_freq_ratio;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
|
||||
|
||||
static bool turbo_disabled(void)
|
||||
{
|
||||
u64 misc_en;
|
||||
int err;
|
||||
|
||||
err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
|
||||
}
|
||||
|
||||
static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
|
||||
*turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define X86_MATCH(model) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
|
||||
INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
|
||||
|
||||
static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
|
||||
X86_MATCH(XEON_PHI_KNL),
|
||||
X86_MATCH(XEON_PHI_KNM),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
|
||||
X86_MATCH(SKYLAKE_X),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
|
||||
X86_MATCH(ATOM_GOLDMONT),
|
||||
X86_MATCH(ATOM_GOLDMONT_D),
|
||||
X86_MATCH(ATOM_GOLDMONT_PLUS),
|
||||
{}
|
||||
};
|
||||
|
||||
static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
|
||||
int num_delta_fratio)
|
||||
{
|
||||
int fratio, delta_fratio, found;
|
||||
int err, i;
|
||||
u64 msr;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
fratio = (msr >> 8) & 0xFF;
|
||||
i = 16;
|
||||
found = 0;
|
||||
do {
|
||||
if (found >= num_delta_fratio) {
|
||||
*turbo_freq = fratio;
|
||||
return true;
|
||||
}
|
||||
|
||||
delta_fratio = (msr >> (i + 5)) & 0x7;
|
||||
|
||||
if (delta_fratio) {
|
||||
found += 1;
|
||||
fratio -= delta_fratio;
|
||||
}
|
||||
|
||||
i += 8;
|
||||
} while (i < 64);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
|
||||
{
|
||||
u64 ratios, counts;
|
||||
u32 group_size;
|
||||
int err, i;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
group_size = (counts >> i) & 0xFF;
|
||||
if (group_size >= size) {
|
||||
*turbo_freq = (ratios >> i) & 0xFF;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
{
|
||||
u64 msr;
|
||||
int err;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
*turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
|
||||
|
||||
/* The CPU may have less than 4 cores */
|
||||
if (!*turbo_freq)
|
||||
*turbo_freq = msr & 0xFF; /* 1C turbo */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool intel_set_max_freq_ratio(void)
|
||||
{
|
||||
u64 base_freq, turbo_freq;
|
||||
u64 turbo_ratio;
|
||||
|
||||
if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
|
||||
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
|
||||
knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
|
||||
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
|
||||
goto out;
|
||||
|
||||
if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
|
||||
goto out;
|
||||
|
||||
return false;
|
||||
|
||||
out:
|
||||
/*
|
||||
* Some hypervisors advertise X86_FEATURE_APERFMPERF
|
||||
* but then fill all MSR's with zeroes.
|
||||
* Some CPUs have turbo boost but don't declare any turbo ratio
|
||||
* in MSR_TURBO_RATIO_LIMIT.
|
||||
*/
|
||||
if (!base_freq || !turbo_freq) {
|
||||
pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
|
||||
if (!turbo_ratio) {
|
||||
pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
arch_turbo_freq_ratio = turbo_ratio;
|
||||
arch_set_max_freq_ratio(turbo_disabled());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void init_counter_refs(void)
|
||||
{
|
||||
u64 aperf, mperf;
|
||||
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
|
||||
this_cpu_write(arch_prev_aperf, aperf);
|
||||
this_cpu_write(arch_prev_mperf, mperf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static struct syscore_ops freq_invariance_syscore_ops = {
|
||||
.resume = init_counter_refs,
|
||||
};
|
||||
|
||||
static void register_freq_invariance_syscore_ops(void)
|
||||
{
|
||||
/* Bail out if registered already. */
|
||||
if (freq_invariance_syscore_ops.node.prev)
|
||||
return;
|
||||
|
||||
register_syscore_ops(&freq_invariance_syscore_ops);
|
||||
}
|
||||
#else
|
||||
static inline void register_freq_invariance_syscore_ops(void) {}
|
||||
#endif
|
||||
|
||||
void init_freq_invariance(bool secondary, bool cppc_ready)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
if (secondary) {
|
||||
if (static_branch_likely(&arch_scale_freq_key)) {
|
||||
init_counter_refs();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
||||
ret = intel_set_max_freq_ratio();
|
||||
else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||
if (!cppc_ready) {
|
||||
return;
|
||||
}
|
||||
ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
init_counter_refs();
|
||||
static_branch_enable(&arch_scale_freq_key);
|
||||
register_freq_invariance_syscore_ops();
|
||||
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
|
||||
} else {
|
||||
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void disable_freq_invariance_workfn(struct work_struct *work)
|
||||
{
|
||||
static_branch_disable(&arch_scale_freq_key);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(disable_freq_invariance_work,
|
||||
disable_freq_invariance_workfn);
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
|
||||
|
||||
void arch_scale_freq_tick(void)
|
||||
{
|
||||
u64 freq_scale;
|
||||
u64 aperf, mperf;
|
||||
u64 acnt, mcnt;
|
||||
|
||||
if (!arch_scale_freq_invariant())
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
|
||||
acnt = aperf - this_cpu_read(arch_prev_aperf);
|
||||
mcnt = mperf - this_cpu_read(arch_prev_mperf);
|
||||
|
||||
this_cpu_write(arch_prev_aperf, aperf);
|
||||
this_cpu_write(arch_prev_mperf, mperf);
|
||||
|
||||
if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
|
||||
goto error;
|
||||
|
||||
if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
|
||||
goto error;
|
||||
|
||||
freq_scale = div64_u64(acnt, mcnt);
|
||||
if (!freq_scale)
|
||||
goto error;
|
||||
|
||||
if (freq_scale > SCHED_CAPACITY_SCALE)
|
||||
freq_scale = SCHED_CAPACITY_SCALE;
|
||||
|
||||
this_cpu_write(arch_freq_scale, freq_scale);
|
||||
return;
|
||||
|
||||
error:
|
||||
pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
|
||||
schedule_work(&disable_freq_invariance_work);
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
@ -154,7 +154,7 @@ int __init amd_numa_init(void)
|
||||
node_set(nodeid, numa_nodes_parsed);
|
||||
}
|
||||
|
||||
if (!nodes_weight(numa_nodes_parsed))
|
||||
if (nodes_empty(numa_nodes_parsed))
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
|
@ -400,7 +400,7 @@ static void leave_uniprocessor(void)
|
||||
int cpu;
|
||||
int err;
|
||||
|
||||
if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
|
||||
if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus))
|
||||
return;
|
||||
pr_notice("Re-enabling CPUs...\n");
|
||||
for_each_cpu(cpu, downed_cpus) {
|
||||
|
@ -123,7 +123,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
|
||||
* Continue to fill physical nodes with fake nodes until there is no
|
||||
* memory left on any of them.
|
||||
*/
|
||||
while (nodes_weight(physnode_mask)) {
|
||||
while (!nodes_empty(physnode_mask)) {
|
||||
for_each_node_mask(i, physnode_mask) {
|
||||
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
|
||||
u64 start, limit, end;
|
||||
@ -270,7 +270,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
|
||||
* Fill physical nodes with fake nodes of size until there is no memory
|
||||
* left on any of them.
|
||||
*/
|
||||
while (nodes_weight(physnode_mask)) {
|
||||
while (!nodes_empty(physnode_mask)) {
|
||||
for_each_node_mask(i, physnode_mask) {
|
||||
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
|
||||
u64 start, limit, end;
|
||||
|
@ -985,7 +985,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
|
||||
|
||||
/* Clear global flags */
|
||||
if (master) {
|
||||
if (cpumask_weight(uv_nmi_cpu_mask))
|
||||
if (!cpumask_empty(uv_nmi_cpu_mask))
|
||||
uv_nmi_cleanup_mask();
|
||||
atomic_set(&uv_nmi_cpus_in_nmi, -1);
|
||||
atomic_set(&uv_nmi_cpu, -1);
|
||||
|
@ -5,14 +5,10 @@
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
__weak void arch_freq_prepare_all(void)
|
||||
{
|
||||
}
|
||||
|
||||
extern const struct seq_operations cpuinfo_op;
|
||||
|
||||
static int cpuinfo_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
arch_freq_prepare_all();
|
||||
return seq_open(file, &cpuinfo_op);
|
||||
}
|
||||
|
||||
|
@ -1199,7 +1199,6 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
|
||||
struct cpufreq_governor *old_gov) { }
|
||||
#endif
|
||||
|
||||
extern void arch_freq_prepare_all(void);
|
||||
extern unsigned int arch_freq_get_on_cpu(int cpu);
|
||||
|
||||
#ifndef arch_set_freq_scale
|
||||
|
Loading…
Reference in New Issue
Block a user