Merge branches 'x86/xen', 'x86/build', 'x86/microcode', 'x86/mm-debug-v2', 'x86/memory-corruption-check', 'x86/early-printk', 'x86/xsave', 'x86/ptrace-v2', 'x86/quirks', 'x86/setup', 'x86/spinlocks' and 'x86/signal' into x86/core-v2

This commit is contained in:
Ingo Molnar 2008-10-12 15:50:02 +02:00
102 changed files with 4968 additions and 2190 deletions

View File

@ -658,11 +658,12 @@ and is between 256 and 4096 characters. It is defined in the file
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]]
earlyprintk=dbgp
Append ",keep" to not disable it when the real console
takes over.
Only vga or serial at a time, not both.
Only vga or serial or usb debug port at a time.
Currently only ttyS0 and ttyS1 are supported.
@ -1231,6 +1232,29 @@ and is between 256 and 4096 characters. It is defined in the file
or
memmap=0x10000$0x18690000
memory_corruption_check=0/1 [X86]
Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume.
Setting this option will scan the memory
looking for corruption. Enabling this will
both detect corruption and prevent the kernel
from using the memory being corrupted.
However, its intended as a diagnostic tool; if
repeatable BIOS-originated corruption always
affects the same memory, you can use memmap=
to prevent the kernel from using that memory.
memory_corruption_check_size=size [X86]
By default it checks for corruption in the low
64k, making this memory unavailable for normal
use. Use this parameter to scan for
corruption in more or less memory.
memory_corruption_check_period=seconds [X86]
By default it checks for corruption every 60
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
memtest= [KNL,X86] Enable memtest
Format: <integer>
range: 0,4 : pattern number

View File

@ -390,6 +390,11 @@ L: iommu@lists.linux-foundation.org
T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
S: Supported
AMD MICROCODE UPDATE SUPPORT
P: Peter Oruba
M: peter.oruba@amd.com
S: Supported
AMS (Apple Motion Sensor) DRIVER
P: Stelian Pop
M: stelian@popies.net

View File

@ -113,11 +113,6 @@ typedef struct siginfo {
#undef NSIGSEGV
#define NSIGSEGV 3
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP
#define NSIGTRAP 4

View File

@ -15,11 +15,6 @@
#include <asm-generic/siginfo.h>
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP
#define NSIGTRAP 4

View File

@ -778,23 +778,45 @@ config X86_REBOOTFIXUPS
Say N otherwise.
config MICROCODE
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
tristate "/dev/cpu/microcode - microcode support"
select FW_LOADER
---help---
If you say Y here, you will be able to update the microcode on
Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II,
Pentium III, Pentium 4, Xeon etc. You will obviously need the
actual microcode binary data itself which is not shipped with the
Linux kernel.
certain Intel and AMD processors. The Intel support is for the
IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
Pentium 4, Xeon etc. The AMD support is for family 0x10 and
0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
You will obviously need the actual microcode binary data itself
which is not shipped with the Linux kernel.
For latest news and information on obtaining all the required
ingredients for this driver, check:
<http://www.urbanmyth.org/microcode/>.
This option selects the general module only, you need to select
at least one vendor specific module as well.
To compile this driver as a module, choose M here: the
module will be called microcode.
config MICROCODE_OLD_INTERFACE
config MICROCODE_INTEL
bool "Intel microcode patch loading support"
depends on MICROCODE
default MICROCODE
select FW_LOADER
--help---
This options enables microcode patch loading support for Intel
processors.
For latest news and information on obtaining all the required
Intel ingredients for this driver, check:
<http://www.urbanmyth.org/microcode/>.
config MICROCODE_AMD
bool "AMD microcode patch loading support"
depends on MICROCODE
select FW_LOADER
--help---
If you select this option, microcode patch loading support for AMD
processors will be enabled.
config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
@ -1061,6 +1083,56 @@ config HIGHPTE
low memory. Setting this option will put user-space page table
entries in high memory.
config X86_CHECK_BIOS_CORRUPTION
bool "Check for low memory corruption"
help
Periodically check for memory corruption in low memory, which
is suspected to be caused by BIOS. Even when enabled in the
configuration, it is disabled at runtime. Enable it by
setting "memory_corruption_check=1" on the kernel command
line. By default it scans the low 64k of memory every 60
seconds; see the memory_corruption_check_size and
memory_corruption_check_period parameters in
Documentation/kernel-parameters.txt to adjust this.
When enabled with the default parameters, this option has
almost no overhead, as it reserves a relatively small amount
of memory and scans it infrequently. It both detects corruption
and prevents it from affecting the running system.
It is, however, intended as a diagnostic tool; if repeatable
BIOS-originated corruption always affects the same memory,
you can use memmap= to prevent the kernel from using that
memory.
config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
bool "Set the default setting of memory_corruption_check"
depends on X86_CHECK_BIOS_CORRUPTION
default y
help
Set whether the default state of memory_corruption_check is
on or off.
config X86_RESERVE_LOW_64K
bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
default y
help
Reserve the first 64K of physical RAM on BIOSes that are known
to potentially corrupt that memory range. A numbers of BIOSes are
known to utilize this area during suspend/resume, so it must not
be used by the kernel.
Set this to N if you are absolutely sure that you trust the BIOS
to get all its memory reservations and usages right.
If you have doubts about the BIOS (e.g. suspend/resume does not
work or there's kernel crashes after certain hardware hotplug
events) and it's not AMI or Phoenix, then you might want to enable
X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
corruption patterns.
Say Y if unsure.
config MATH_EMULATION
bool
prompt "Math emulation" if X86_32

View File

@ -43,6 +43,19 @@ config EARLY_PRINTK
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
default n
depends on EARLY_PRINTK && PCI
help
Write kernel log output directly into the EHCI debug port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL

View File

@ -45,3 +45,8 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
endif

View File

@ -72,9 +72,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
$(obj)/zImage: IMAGE_OFFSET := 0x1000
$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK)
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
@ -117,7 +115,7 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
FDARGS =
@ -181,6 +179,7 @@ isoimage: $(BOOTIMAGE)
mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
-no-emul-boot -boot-load-size 4 -boot-info-table \
$(obj)/isoimage
isohybrid $(obj)/image.iso 2>/dev/null || true
rm -rf $(obj)/isoimage
zlilo: $(BOOTIMAGE)

View File

@ -27,9 +27,8 @@ $(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
ifeq ($(CONFIG_X86_32),y)
targets += vmlinux.bin.all vmlinux.relocs
hostprogs-y := relocs
targets += vmlinux.bin.all vmlinux.relocs relocs
hostprogs-$(CONFIG_X86_32) += relocs
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@ -43,6 +42,8 @@ quiet_cmd_relocbin = BUILD $@
$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
$(call if_changed,relocbin)
ifeq ($(CONFIG_X86_32),y)
ifdef CONFIG_RELOCATABLE
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
$(call if_changed,gzip)
@ -59,6 +60,5 @@ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
endif
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld)

View File

@ -41,6 +41,7 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
char *mbrbuf_ptr, *mbrbuf_end;
u32 buf_base, mbr_base;
extern char _end[];
u16 mbr_magic;
sector_size = ei->params.bytes_per_sector;
if (!sector_size)
@ -58,11 +59,15 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
return -1;
memset(mbrbuf_ptr, 0, sector_size);
if (read_mbr(devno, mbrbuf_ptr))
return -1;
*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
return 0;
mbr_magic = *(u16 *)&mbrbuf_ptr[510];
/* check for valid MBR magic */
return mbr_magic == 0xAA55 ? 0 : -1;
}
static int get_edd_info(u8 devno, struct edd_info *ei)

View File

@ -224,7 +224,7 @@ static void vesa_store_pm_info(void)
static void vesa_store_mode_params_graphics(void)
{
/* Tell the kernel we're in VESA graphics mode */
boot_params.screen_info.orig_video_isVGA = 0x23;
boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
/* Mode parameters */
boot_params.screen_info.vesa_attributes = vminfo.mode_attr;

View File

@ -1535,7 +1535,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE is not set
CONFIG_LOGO=y

View File

@ -1505,7 +1505,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE is not set
CONFIG_LOGO=y

View File

@ -351,31 +351,28 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
savesegment(es, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->es);
err |= __put_user((u32)regs->di, &sc->di);
err |= __put_user((u32)regs->si, &sc->si);
err |= __put_user((u32)regs->bp, &sc->bp);
err |= __put_user((u32)regs->sp, &sc->sp);
err |= __put_user((u32)regs->bx, &sc->bx);
err |= __put_user((u32)regs->dx, &sc->dx);
err |= __put_user((u32)regs->cx, &sc->cx);
err |= __put_user((u32)regs->ax, &sc->ax);
err |= __put_user((u32)regs->cs, &sc->cs);
err |= __put_user((u32)regs->ss, &sc->ss);
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
err |= __put_user(regs->sp, &sc->sp);
err |= __put_user(regs->bx, &sc->bx);
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
err |= __put_user(regs->cs, &sc->cs);
err |= __put_user(regs->ss, &sc->ss);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user((u32)regs->ip, &sc->ip);
err |= __put_user((u32)regs->flags, &sc->flags);
err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ip, &sc->ip);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
tmp = save_i387_xstate_ia32(fpstate);
if (tmp < 0)
err = -EFAULT;
else {
clear_used_math();
stts();
else
err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
&sc->fpstate);
}
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
@ -444,21 +441,18 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
return -EFAULT;
err |= __put_user(sig, &frame->sig);
if (err)
goto give_sigsegv;
if (__put_user(sig, &frame->sig))
return -EFAULT;
err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
return -EFAULT;
if (_COMPAT_NSIG_WORDS > 1) {
err |= __copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask));
if (err)
goto give_sigsegv;
if (__copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask)))
return -EFAULT;
}
if (ka->sa.sa_flags & SA_RESTORER) {
@ -479,7 +473,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
*/
err |= __copy_to_user(frame->retcode, &code, 8);
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
@ -502,10 +496,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
#endif
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@ -533,14 +523,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
return -EFAULT;
err |= __put_user(sig, &frame->sig);
err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
err |= copy_siginfo_to_user32(&frame->info, info);
if (err)
goto give_sigsegv;
return -EFAULT;
/* Create the ucontext. */
if (cpu_has_xsave)
@ -556,7 +546,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
return -EFAULT;
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
@ -571,7 +561,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
*/
err |= __copy_to_user(frame->retcode, &code, 8);
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
@ -599,8 +589,4 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
#endif
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}

View File

@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
endif
#
@ -51,7 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
@ -90,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
@ -100,6 +99,11 @@ scx200-y += scx200_32.o
obj-$(CONFIG_OLPC) += olpc.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
obj-$(CONFIG_MICROCODE) += microcode.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)

View File

@ -1418,8 +1418,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
acpi_skip_timer_override = 1;
/*
* The ati_ixp4x0_rev() early PCI quirk should have set
* the acpi_skip_timer_override flag already:
*/
if (!acpi_skip_timer_override) {
WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
return 0;
}

View File

@ -1121,16 +1121,5 @@ void __cpuinit cpu_init(void)
xsave_init();
}
#ifdef CONFIG_HOTPLUG_CPU
void __cpuinit cpu_uninit(void)
{
int cpu = raw_smp_processor_id();
cpu_clear(cpu, cpu_initialized);
/* lazy TLB state */
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
#endif
#endif

View File

@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
.__cr3 = __pa(swapper_pg_dir)
.__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir)
}
};

View File

@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func)
}
static u32 ati_ixp4x0_rev(int num, int slot, int func)
{
u32 d;
u8 b;
b = read_pci_config_byte(num, slot, func, 0xac);
b &= ~(1<<5);
write_pci_config_byte(num, slot, func, 0xac, b);
d = read_pci_config(num, slot, func, 0x70);
d |= 1<<8;
write_pci_config(num, slot, func, 0x70, d);
d = read_pci_config(num, slot, func, 0x8);
d &= 0xff;
return d;
}
static void __init ati_bugs(int num, int slot, int func)
{
#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC)
u32 d;
u8 b;
if (acpi_use_timer_override)
return;
d = ati_ixp4x0_rev(num, slot, func);
if (d < 0x82)
acpi_skip_timer_override = 1;
else {
/* check for IRQ0 interrupt swap */
outb(0x72, 0xcd6); b = inb(0xcd7);
if (!(b & 0x2))
acpi_skip_timer_override = 1;
}
if (acpi_skip_timer_override) {
printk(KERN_INFO "SB4X0 revision 0x%x\n", d);
printk(KERN_INFO "Ignoring ACPI timer override.\n");
printk(KERN_INFO "If you got timer trouble "
"try acpi_use_timer_override\n");
}
#endif
}
#ifdef CONFIG_DMAR
static void __init intel_g33_dmar(int num, int slot, int func)
{
@ -128,6 +174,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
#ifdef CONFIG_DMAR
{ PCI_VENDOR_ID_INTEL, 0x29c0,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },

View File

@ -3,11 +3,19 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/screen_info.h>
#include <linux/usb/ch9.h>
#include <linux/pci_regs.h>
#include <linux/pci_ids.h>
#include <linux/errno.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/fcntl.h>
#include <asm/setup.h>
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
#include <linux/usb/ehci_def.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */
static int early_serial_putc(unsigned char ch)
{
unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
@ -111,7 +120,7 @@ static __init void early_serial_init(char *s)
if (!strncmp(s, "0x", 2)) {
early_serial_base = simple_strtoul(s, &e, 16);
} else {
static int bases[] = { 0x3f8, 0x2f8 };
static const int __initconst bases[] = { 0x3f8, 0x2f8 };
if (!strncmp(s, "ttyS", 4))
s += 4;
@ -151,6 +160,721 @@ static struct console early_serial_console = {
.index = -1,
};
#ifdef CONFIG_EARLY_PRINTK_DBGP
static struct ehci_caps __iomem *ehci_caps;
static struct ehci_regs __iomem *ehci_regs;
static struct ehci_dbg_port __iomem *ehci_debug;
static unsigned int dbgp_endpoint_out;
struct ehci_dev {
u32 bus;
u32 slot;
u32 func;
};
static struct ehci_dev ehci_dev;
#define USB_DEBUG_DEVNUM 127
#define DBGP_DATA_TOGGLE 0x8800
static inline u32 dbgp_pid_update(u32 x, u32 tok)
{
return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
}
static inline u32 dbgp_len_update(u32 x, u32 len)
{
return (x & ~0x0f) | (len & 0x0f);
}
/*
* USB Packet IDs (PIDs)
*/
/* token */
#define USB_PID_OUT 0xe1
#define USB_PID_IN 0x69
#define USB_PID_SOF 0xa5
#define USB_PID_SETUP 0x2d
/* handshake */
#define USB_PID_ACK 0xd2
#define USB_PID_NAK 0x5a
#define USB_PID_STALL 0x1e
#define USB_PID_NYET 0x96
/* data */
#define USB_PID_DATA0 0xc3
#define USB_PID_DATA1 0x4b
#define USB_PID_DATA2 0x87
#define USB_PID_MDATA 0x0f
/* Special */
#define USB_PID_PREAMBLE 0x3c
#define USB_PID_ERR 0x3c
#define USB_PID_SPLIT 0x78
#define USB_PID_PING 0xb4
#define USB_PID_UNDEF_0 0xf0
#define USB_PID_DATA_TOGGLE 0x88
#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
#define PCI_CAP_ID_EHCI_DEBUG 0xa
#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
#define HUB_SHORT_RESET_TIME 10
#define HUB_LONG_RESET_TIME 200
#define HUB_RESET_TIMEOUT 500
#define DBGP_MAX_PACKET 8
static int dbgp_wait_until_complete(void)
{
u32 ctrl;
int loop = 0x100000;
do {
ctrl = readl(&ehci_debug->control);
/* Stop when the transaction is finished */
if (ctrl & DBGP_DONE)
break;
} while (--loop > 0);
if (!loop)
return -1;
/*
* Now that we have observed the completed transaction,
* clear the done bit.
*/
writel(ctrl | DBGP_DONE, &ehci_debug->control);
return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
}
static void dbgp_mdelay(int ms)
{
int i;
while (ms--) {
for (i = 0; i < 1000; i++)
outb(0x1, 0x80);
}
}
static void dbgp_breath(void)
{
/* Sleep to give the debug port a chance to breathe */
}
static int dbgp_wait_until_done(unsigned ctrl)
{
u32 pids, lpid;
int ret;
int loop = 3;
retry:
writel(ctrl | DBGP_GO, &ehci_debug->control);
ret = dbgp_wait_until_complete();
pids = readl(&ehci_debug->pids);
lpid = DBGP_PID_GET(pids);
if (ret < 0)
return ret;
/*
* If the port is getting full or it has dropped data
* start pacing ourselves, not necessary but it's friendly.
*/
if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
dbgp_breath();
/* If I get a NACK reissue the transmission */
if (lpid == USB_PID_NAK) {
if (--loop > 0)
goto retry;
}
return ret;
}
static void dbgp_set_data(const void *buf, int size)
{
const unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = hi = 0;
for (i = 0; i < 4 && i < size; i++)
lo |= bytes[i] << (8*i);
for (; i < 8 && i < size; i++)
hi |= bytes[i] << (8*(i - 4));
writel(lo, &ehci_debug->data03);
writel(hi, &ehci_debug->data47);
}
static void dbgp_get_data(void *buf, int size)
{
unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = readl(&ehci_debug->data03);
hi = readl(&ehci_debug->data47);
for (i = 0; i < 4 && i < size; i++)
bytes[i] = (lo >> (8*i)) & 0xff;
for (; i < 8 && i < size; i++)
bytes[i] = (hi >> (8*(i - 4))) & 0xff;
}
static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
const char *bytes, int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_OUT);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
dbgp_set_data(bytes, size);
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
return ret;
}
static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_IN);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl &= ~DBGP_OUT;
ctrl |= DBGP_GO;
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
if (size > ret)
size = ret;
dbgp_get_data(data, size);
return ret;
}
static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
int value, int index, void *data, int size)
{
u32 pids, addr, ctrl;
struct usb_ctrlrequest req;
int read;
int ret;
read = (requesttype & USB_DIR_IN) != 0;
if (size > (read ? DBGP_MAX_PACKET:0))
return -1;
/* Compute the control message */
req.bRequestType = requesttype;
req.bRequest = request;
req.wValue = cpu_to_le16(value);
req.wIndex = cpu_to_le16(index);
req.wLength = cpu_to_le16(size);
pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
addr = DBGP_EPADDR(devnum, 0);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, sizeof(req));
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
/* Send the setup message */
dbgp_set_data(&req, sizeof(req));
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
/* Read the result */
return dbgp_bulk_read(devnum, 0, data, size);
}
/* Find a PCI capability */
static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
{
u8 pos;
int bytes;
if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
PCI_STATUS_CAP_LIST))
return 0;
pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
u8 id;
pos &= ~3;
id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
if (id == cap)
return pos;
pos = read_pci_config_byte(num, slot, func,
pos+PCI_CAP_LIST_NEXT);
}
return 0;
}
static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
{
u32 class;
class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
return 0;
return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
}
static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
{
u32 bus, slot, func;
for (bus = 0; bus < 256; bus++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
unsigned cap;
cap = __find_dbgp(bus, slot, func);
if (!cap)
continue;
if (ehci_num-- != 0)
continue;
*rbus = bus;
*rslot = slot;
*rfunc = func;
return cap;
}
}
}
return 0;
}
static int ehci_reset_port(int port)
{
u32 portsc;
u32 delay_time, delay;
int loop;
/* Reset the usb debug port */
portsc = readl(&ehci_regs->port_status[port - 1]);
portsc &= ~PORT_PE;
portsc |= PORT_RESET;
writel(portsc, &ehci_regs->port_status[port - 1]);
delay = HUB_ROOT_RESET_TIME;
for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
delay_time += delay) {
dbgp_mdelay(delay);
portsc = readl(&ehci_regs->port_status[port - 1]);
if (portsc & PORT_RESET) {
/* force reset to complete */
loop = 2;
writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
&ehci_regs->port_status[port - 1]);
do {
portsc = readl(&ehci_regs->port_status[port-1]);
} while ((portsc & PORT_RESET) && (--loop > 0));
}
/* Device went away? */
if (!(portsc & PORT_CONNECT))
return -ENOTCONN;
/* bomb out completely if something weird happend */
if ((portsc & PORT_CSC))
return -EINVAL;
/* If we've finished resetting, then break out of the loop */
if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
return 0;
}
return -EBUSY;
}
static int ehci_wait_for_port(int port)
{
u32 status;
int ret, reps;
for (reps = 0; reps < 3; reps++) {
dbgp_mdelay(100);
status = readl(&ehci_regs->status);
if (status & STS_PCD) {
ret = ehci_reset_port(port);
if (ret == 0)
return 0;
}
}
return -ENOTCONN;
}
#ifdef DBGP_DEBUG
# define dbgp_printk early_printk
#else
static inline void dbgp_printk(const char *fmt, ...) { }
#endif
typedef void (*set_debug_port_t)(int port);
static void default_set_debug_port(int port)
{
}
static set_debug_port_t set_debug_port = default_set_debug_port;
static void nvidia_set_debug_port(int port)
{
u32 dword;
dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x74);
dword &= ~(0x0f<<12);
dword |= ((port & 0x0f)<<12);
write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
dword);
dbgp_printk("set debug port to %d\n", port);
}
static void __init detect_set_debug_port(void)
{
u32 vendorid;
vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x00);
if ((vendorid & 0xffff) == 0x10de) {
dbgp_printk("using nvidia set_debug_port\n");
set_debug_port = nvidia_set_debug_port;
}
}
static int __init ehci_setup(void)
{
struct usb_debug_descriptor dbgp_desc;
u32 cmd, ctrl, status, portsc, hcs_params;
u32 debug_port, new_debug_port = 0, n_ports;
u32 devnum;
int ret, i;
int loop;
int port_map_tried;
int playtimes = 3;
try_next_time:
port_map_tried = 0;
try_next_port:
hcs_params = readl(&ehci_caps->hcs_params);
debug_port = HCS_DEBUG_PORT(hcs_params);
n_ports = HCS_N_PORTS(hcs_params);
dbgp_printk("debug_port: %d\n", debug_port);
dbgp_printk("n_ports: %d\n", n_ports);
for (i = 1; i <= n_ports; i++) {
portsc = readl(&ehci_regs->port_status[i-1]);
dbgp_printk("portstatus%d: %08x\n", i, portsc);
}
if (port_map_tried && (new_debug_port != debug_port)) {
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
loop = 10;
/* Reset the EHCI controller */
cmd = readl(&ehci_regs->command);
cmd |= CMD_RESET;
writel(cmd, &ehci_regs->command);
do {
cmd = readl(&ehci_regs->command);
} while ((cmd & CMD_RESET) && (--loop > 0));
if (!loop) {
dbgp_printk("can not reset ehci\n");
return -1;
}
dbgp_printk("ehci reset done\n");
/* Claim ownership, but do not enable yet */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_OWNER;
ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
writel(ctrl, &ehci_debug->control);
/* Start the ehci running */
cmd = readl(&ehci_regs->command);
cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
cmd |= CMD_RUN;
writel(cmd, &ehci_regs->command);
/* Ensure everything is routed to the EHCI */
writel(FLAG_CF, &ehci_regs->configured_flag);
/* Wait until the controller is no longer halted */
loop = 10;
do {
status = readl(&ehci_regs->status);
} while ((status & STS_HALT) && (--loop > 0));
if (!loop) {
dbgp_printk("ehci can be started\n");
return -1;
}
dbgp_printk("ehci started\n");
/* Wait for a device to show up in the debug port */
ret = ehci_wait_for_port(debug_port);
if (ret < 0) {
dbgp_printk("No device found in debug port\n");
goto next_debug_port;
}
dbgp_printk("ehci wait for port done\n");
/* Enable the debug port */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_CLAIM;
writel(ctrl, &ehci_debug->control);
ctrl = readl(&ehci_debug->control);
if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
dbgp_printk("No device in debug port\n");
writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
goto err;
}
dbgp_printk("debug ported enabled\n");
/* Completely transfer the debug device to the debug controller */
portsc = readl(&ehci_regs->port_status[debug_port - 1]);
portsc &= ~PORT_PE;
writel(portsc, &ehci_regs->port_status[debug_port - 1]);
dbgp_mdelay(100);
/* Find the debug device and make it device number 127 */
for (devnum = 0; devnum <= 127; devnum++) {
ret = dbgp_control_msg(devnum,
USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
&dbgp_desc, sizeof(dbgp_desc));
if (ret > 0)
break;
}
if (devnum > 127) {
dbgp_printk("Could not find attached debug device\n");
goto err;
}
if (ret < 0) {
dbgp_printk("Attached device is not a debug device\n");
goto err;
}
dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
/* Move the device to 127 if it isn't already there */
if (devnum != USB_DEBUG_DEVNUM) {
ret = dbgp_control_msg(devnum,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
if (ret < 0) {
dbgp_printk("Could not move attached device to %d\n",
USB_DEBUG_DEVNUM);
goto err;
}
devnum = USB_DEBUG_DEVNUM;
dbgp_printk("debug device renamed to 127\n");
}
/* Enable the debug interface */
ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
if (ret < 0) {
dbgp_printk(" Could not enable the debug device\n");
goto err;
}
dbgp_printk("debug interface enabled\n");
/* Perform a small write to get the even/odd data state in sync
*/
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
if (ret < 0) {
dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
goto err;
}
dbgp_printk("small write doned\n");
return 0;
err:
/* Things didn't work so remove my claim */
ctrl = readl(&ehci_debug->control);
ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
writel(ctrl, &ehci_debug->control);
return -1;
next_debug_port:
port_map_tried |= (1<<(debug_port - 1));
new_debug_port = ((debug_port-1+1)%n_ports) + 1;
if (port_map_tried != ((1<<n_ports) - 1)) {
set_debug_port(new_debug_port);
goto try_next_port;
}
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
static int __init early_dbgp_init(char *s)
{
u32 debug_port, bar, offset;
u32 bus, slot, func, cap;
void __iomem *ehci_bar;
u32 dbgp_num;
u32 bar_val;
char *e;
int ret;
u8 byte;
if (!early_pci_allowed())
return -1;
dbgp_num = 0;
if (*s)
dbgp_num = simple_strtoul(s, &e, 10);
dbgp_printk("dbgp_num: %d\n", dbgp_num);
cap = find_dbgp(dbgp_num, &bus, &slot, &func);
if (!cap)
return -1;
dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
func);
debug_port = read_pci_config(bus, slot, func, cap);
bar = (debug_port >> 29) & 0x7;
bar = (bar * 4) + 0xc;
offset = (debug_port >> 16) & 0xfff;
dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
if (bar != PCI_BASE_ADDRESS_0) {
dbgp_printk("only debug ports on bar 1 handled.\n");
return -1;
}
bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
dbgp_printk("only simple 32bit mmio bars supported\n");
return -1;
}
/* double check if the mem space is enabled */
byte = read_pci_config_byte(bus, slot, func, 0x04);
if (!(byte & 0x2)) {
byte |= 0x02;
write_pci_config_byte(bus, slot, func, 0x04, byte);
dbgp_printk("mmio for ehci enabled\n");
}
/*
* FIXME I don't have the bar size so just guess PAGE_SIZE is more
* than enough. 1K is the biggest I have seen.
*/
set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
ehci_bar += bar_val & ~PAGE_MASK;
dbgp_printk("ehci_bar: %p\n", ehci_bar);
ehci_caps = ehci_bar;
ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
ehci_debug = ehci_bar + offset;
ehci_dev.bus = bus;
ehci_dev.slot = slot;
ehci_dev.func = func;
detect_set_debug_port();
ret = ehci_setup();
if (ret < 0) {
dbgp_printk("ehci_setup failed\n");
ehci_debug = NULL;
return -1;
}
return 0;
}
static void early_dbgp_write(struct console *con, const char *str, u32 n)
{
int chunk, ret;
if (!ehci_debug)
return;
while (n > 0) {
chunk = n;
if (chunk > DBGP_MAX_PACKET)
chunk = DBGP_MAX_PACKET;
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
dbgp_endpoint_out, str, chunk);
str += chunk;
n -= chunk;
}
}
static struct console early_dbgp_console = {
.name = "earlydbg",
.write = early_dbgp_write,
.flags = CON_PRINTBUFFER,
.index = -1,
};
#endif
/* Console interface to a host file on AMD's SimNow! */
static int simnow_fd;
@ -165,6 +889,7 @@ enum {
static noinline long simnow(long cmd, long a, long b, long c)
{
long ret;
asm volatile("cpuid" :
"=a" (ret) :
"b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
static void __init simnow_init(char *str)
{
char *fn = "klog";
if (*str == '=')
fn = ++str;
/* error ignored */
@ -194,7 +920,7 @@ static struct console simnow_console = {
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
static int early_console_initialized;
static int __initdata early_console_initialized;
asmlinkage void early_printk(const char *fmt, ...)
{
@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...)
va_end(ap);
}
static int __initdata keep_early;
static int __init setup_early_printk(char *buf)
{
int keep_early;
if (!buf)
return 0;
@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf)
return 0;
early_console_initialized = 1;
if (strstr(buf, "keep"))
keep_early = 1;
keep_early = (strstr(buf, "keep") != NULL);
if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf)
simnow_init(buf + 6);
early_console = &simnow_console;
keep_early = 1;
#ifdef CONFIG_EARLY_PRINTK_DBGP
} else if (!strncmp(buf, "dbgp", 4)) {
if (early_dbgp_init(buf+4) < 0)
return 0;
early_console = &early_dbgp_console;
/*
* usb subsys will reset ehci controller, so don't keep
* that early console
*/
keep_early = 0;
#endif
#ifdef CONFIG_HVC_XEN
} else if (!strncmp(buf, "xen", 3)) {
early_console = &xenboot_console;
@ -251,4 +988,5 @@ static int __init setup_early_printk(char *buf)
register_console(early_console);
return 0;
}
early_param("earlyprintk", setup_early_printk);

View File

@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
static int save_i387_xsave(void __user *buf)
{
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fx = buf;
int err = 0;
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context.
* This will enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware applications can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
if (save_i387_fxsave(fx) < 0)
return -1;

View File

@ -52,6 +52,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
paravirt_alloc_ldt(newldt, mincount);
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
@ -74,6 +76,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#endif
}
if (oldsize) {
paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
@ -85,10 +88,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
int i;
if (err < 0)
return err;
memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
for(i = 0; i < old->size; i++)
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
}
@ -125,6 +131,7 @@ void destroy_context(struct mm_struct *mm)
if (mm == current->active_mm)
clear_LDT();
#endif
paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else

View File

@ -1,853 +0,0 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to speculative
* nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
//#define DEBUG /* pr_debug */
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
#define MICROCODE_VERSION "1.14a"
#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */
#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */
#define DWSIZE (sizeof (u32))
#define get_totalsize(mc) \
(((microcode_t *)mc)->hdr.totalsize ? \
((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
#define get_datasize(mc) \
(((microcode_t *)mc)->hdr.datasize ? \
((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
#define sigmatch(s1, s2, p1, p2) \
(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
static DEFINE_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
static struct ucode_cpu_info {
int valid;
unsigned int sig;
unsigned int pf;
unsigned int rev;
microcode_t *mc;
} ucode_cpu_info[NR_CPUS];
static void collect_cpu_info(int cpu_num)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
unsigned int val[2];
/* We should bind the task to the CPU */
BUG_ON(raw_smp_processor_id() != cpu_num);
uci->pf = uci->rev = 0;
uci->mc = NULL;
uci->valid = 1;
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
printk(KERN_ERR "microcode: CPU%d not a capable Intel "
"processor\n", cpu_num);
uci->valid = 0;
return;
}
uci->sig = cpuid_eax(0x00000001);
if ((c->x86_model >= 5) || (c->x86 > 6)) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
uci->pf = 1 << ((val[1] >> 18) & 7);
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
uci->sig, uci->pf, uci->rev);
}
static inline int microcode_update_match(int cpu_num,
microcode_header_t *mc_header, int sig, int pf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
if (!sigmatch(sig, uci->sig, pf, uci->pf)
|| mc_header->rev <= uci->rev)
return 0;
return 1;
}
static int microcode_sanity_check(void *mc)
{
microcode_header_t *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
struct extended_signature *ext_sig;
unsigned long total_size, data_size, ext_table_size;
int sum, orig_sum, ext_sigcount = 0, i;
total_size = get_totalsize(mc_header);
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
printk(KERN_ERR "microcode: error! "
"Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
printk(KERN_ERR "microcode: error! "
"Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
printk(KERN_ERR "microcode: error! "
"Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
printk(KERN_ERR "microcode: error! "
"Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
}
/* check extended table checksum */
if (ext_table_size) {
int ext_table_sum = 0;
int *ext_tablep = (int *)ext_header;
i = ext_table_size / DWSIZE;
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
printk(KERN_WARNING "microcode: aborting, "
"bad extended signature table checksum\n");
return -EINVAL;
}
}
/* calculate the checksum */
orig_sum = 0;
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
return 0;
/* check extended signature checksum */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
sum = orig_sum
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
}
return 0;
}
/*
* return 0 - no update found
* return 1 - found update
* return < 0 - error
*/
static int get_maching_microcode(void *mc, int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
microcode_header_t *mc_header = mc;
struct extended_sigtable *ext_header;
unsigned long total_size = get_totalsize(mc_header);
int ext_sigcount, i;
struct extended_signature *ext_sig;
void *new_mc;
if (microcode_update_match(cpu, mc_header,
mc_header->sig, mc_header->pf))
goto find;
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
return 0;
ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
ext_sigcount = ext_header->count;
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
for (i = 0; i < ext_sigcount; i++) {
if (microcode_update_match(cpu, mc_header,
ext_sig->sig, ext_sig->pf))
goto find;
ext_sig++;
}
return 0;
find:
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
new_mc = vmalloc(total_size);
if (!new_mc) {
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
return -ENOMEM;
}
/* free previous update file */
vfree(uci->mc);
memcpy(new_mc, mc, total_size);
uci->mc = new_mc;
return 1;
}
static void apply_microcode(int cpu)
{
unsigned long flags;
unsigned int val[2];
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (uci->mc == NULL)
return;
/* serialize access to the physical write to MSR 0x79 */
spin_lock_irqsave(&microcode_update_lock, flags);
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) uci->mc->bits,
(unsigned long) uci->mc->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
spin_unlock_irqrestore(&microcode_update_lock, flags);
if (val[1] != uci->mc->hdr.rev) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x, date = %08x \n",
cpu_num, uci->rev, val[1], uci->mc->hdr.date);
uci->rev = val[1];
}
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
static void __user *user_buffer; /* user area microcode data buffer */
static unsigned int user_buffer_size; /* it's size */
static long get_next_ucode(void **mc, long offset)
{
microcode_header_t mc_header;
unsigned long total_size;
/* No more data */
if (offset >= user_buffer_size)
return 0;
if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
printk(KERN_ERR "microcode: error! Can not read user data\n");
return -EFAULT;
}
total_size = get_totalsize(&mc_header);
if (offset + total_size > user_buffer_size) {
printk(KERN_ERR "microcode: error! Bad total size in microcode "
"data file\n");
return -EINVAL;
}
*mc = vmalloc(total_size);
if (!*mc)
return -ENOMEM;
if (copy_from_user(*mc, user_buffer + offset, total_size)) {
printk(KERN_ERR "microcode: error! Can not read user data\n");
vfree(*mc);
return -EFAULT;
}
return offset + total_size;
}
static int do_microcode_update (void)
{
long cursor = 0;
int error = 0;
void *new_mc = NULL;
int cpu;
cpumask_t old;
old = current->cpus_allowed;
while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
error = microcode_sanity_check(new_mc);
if (error)
goto out;
/*
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = get_maching_microcode(new_mc, cpu);
if (error < 0)
goto out;
if (error == 1)
apply_microcode(cpu);
}
vfree(new_mc);
}
out:
if (cursor > 0)
vfree(new_mc);
if (cursor < 0)
error = cursor;
set_cpus_allowed_ptr(current, &old);
return error;
}
static int microcode_open (struct inode *unused1, struct file *unused2)
{
cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
{
ssize_t ret;
if ((len >> PAGE_SHIFT) > num_physpages) {
printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
return -EINVAL;
}
get_online_cpus();
mutex_lock(&microcode_mutex);
user_buffer = (void __user *) buf;
user_buffer_size = (int) len;
ret = do_microcode_update();
if (!ret)
ret = (ssize_t)len;
mutex_unlock(&microcode_mutex);
put_online_cpus();
return ret;
}
static const struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
};
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
.fops = &microcode_fops,
};
static int __init microcode_dev_init (void)
{
int error;
error = misc_register(&microcode_dev);
if (error) {
printk(KERN_ERR
"microcode: can't misc_register on minor=%d\n",
MICROCODE_MINOR);
return error;
}
return 0;
}
static void microcode_dev_exit (void)
{
misc_deregister(&microcode_dev);
}
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while(0)
#endif
static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
unsigned long size, long offset)
{
microcode_header_t *mc_header;
unsigned long total_size;
/* No more data */
if (offset >= size)
return 0;
mc_header = (microcode_header_t *)(buf + offset);
total_size = get_totalsize(mc_header);
if (offset + total_size > size) {
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
return -EINVAL;
}
*mc = vmalloc(total_size);
if (!*mc) {
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
return -ENOMEM;
}
memcpy(*mc, buf + offset, total_size);
return offset + total_size;
}
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;
static int cpu_request_microcode(int cpu)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
const u8 *buf;
unsigned long size;
long offset = 0;
int error;
void *mc;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
sprintf(name,"intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
error = request_firmware(&firmware, name, &microcode_pdev->dev);
if (error) {
pr_debug("microcode: data file %s load failed\n", name);
return error;
}
buf = firmware->data;
size = firmware->size;
while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
> 0) {
error = microcode_sanity_check(mc);
if (error)
break;
error = get_maching_microcode(mc, cpu);
if (error < 0)
break;
/*
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
if (error == 1) {
apply_microcode(cpu);
error = 0;
}
vfree(mc);
}
if (offset > 0)
vfree(mc);
if (offset < 0)
error = offset;
release_firmware(firmware);
return error;
}
static int apply_microcode_check_cpu(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
cpumask_t old;
unsigned int val[2];
int err = 0;
/* Check if the microcode is available */
if (!uci->mc)
return 0;
old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* Check if the microcode we have in memory matches the CPU */
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
err = -EINVAL;
if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
if (uci->pf != (1 << ((val[1] >> 18) & 7)))
err = -EINVAL;
}
if (!err) {
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (uci->rev != val[1])
err = -EINVAL;
}
if (!err)
apply_microcode(cpu);
else
printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
" sig=0x%x, pf=0x%x, rev=0x%x\n",
cpu, uci->sig, uci->pf, uci->rev);
set_cpus_allowed_ptr(current, &old);
return err;
}
static void microcode_init_cpu(int cpu, int resume)
{
cpumask_t old;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
mutex_lock(&microcode_mutex);
uci->valid = 0;
vfree(uci->mc);
uci->mc = NULL;
mutex_unlock(&microcode_mutex);
}
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t sz)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
unsigned long val = simple_strtoul(buf, &end, 0);
int err = 0;
int cpu = dev->id;
if (end == buf)
return -EINVAL;
if (val == 1) {
cpumask_t old = current->cpus_allowed;
get_online_cpus();
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid)
err = cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
put_online_cpus();
set_cpus_allowed_ptr(current, &old);
}
if (err)
return err;
return sz;
}
static ssize_t version_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->rev);
}
static ssize_t pf_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->pf);
}
static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
static SYSDEV_ATTR(version, 0400, version_show, NULL);
static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
static struct attribute *mc_default_attrs[] = {
&attr_reload.attr,
&attr_version.attr,
&attr_processor_flags.attr,
NULL
};
static struct attribute_group mc_attr_group = {
.attrs = mc_default_attrs,
.name = "microcode",
};
static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
{
int err, cpu = sys_dev->id;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d added\n", cpu);
memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
microcode_init_cpu(cpu, resume);
return 0;
}
static int mc_sysdev_add(struct sys_device *sys_dev)
{
return __mc_sysdev_add(sys_dev, 0);
}
static int mc_sysdev_remove(struct sys_device *sys_dev)
{
int cpu = sys_dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
static int mc_sysdev_resume(struct sys_device *dev)
{
int cpu = dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d resumed\n", cpu);
/* only CPU 0 will apply ucode here */
apply_microcode(0);
return 0;
}
static struct sysdev_driver mc_sysdev_driver = {
.add = mc_sysdev_add,
.remove = mc_sysdev_remove,
.resume = mc_sysdev_resume,
};
static __cpuinit int
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_UP_CANCELED_FROZEN:
/* The CPU refused to come up during a system resume */
microcode_fini_cpu(cpu);
break;
case CPU_ONLINE:
case CPU_DOWN_FAILED:
mc_sysdev_add(sys_dev);
break;
case CPU_ONLINE_FROZEN:
/* System-wide resume is in progress, try to apply microcode */
if (apply_microcode_check_cpu(cpu)) {
/* The application of microcode failed */
microcode_fini_cpu(cpu);
__mc_sysdev_add(sys_dev, 1);
break;
}
case CPU_DOWN_FAILED_FROZEN:
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
printk(KERN_ERR "microcode: Failed to create the sysfs "
"group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
mc_sysdev_remove(sys_dev);
break;
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
static int __init microcode_init (void)
{
int error;
printk(KERN_INFO
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
error = microcode_dev_init();
if (error)
return error;
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
if (IS_ERR(microcode_pdev)) {
microcode_dev_exit();
return PTR_ERR(microcode_pdev);
}
get_online_cpus();
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
if (error) {
microcode_dev_exit();
platform_device_unregister(microcode_pdev);
return error;
}
register_hotcpu_notifier(&mc_cpu_notifier);
return 0;
}
static void __exit microcode_exit (void)
{
microcode_dev_exit();
unregister_hotcpu_notifier(&mc_cpu_notifier);
get_online_cpus();
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
platform_device_unregister(microcode_pdev);
}
module_init(microcode_init)
module_exit(microcode_exit)

View File

@ -0,0 +1,435 @@
/*
* AMD CPU Microcode Update Driver for Linux
* Copyright (C) 2008 Advanced Micro Devices Inc.
*
* Author: Peter Oruba <peter.oruba@amd.com>
*
* Based on work by:
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
*
* This driver allows to upgrade microcode on AMD
* family 0x10 and 0x11 processors.
*
* Licensed unter the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
MODULE_LICENSE("GPL v2");
#define UCODE_MAGIC 0x00414d44
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
struct equiv_cpu_entry {
unsigned int installed_cpu;
unsigned int fixed_errata_mask;
unsigned int fixed_errata_compare;
unsigned int equiv_cpu;
};
struct microcode_header_amd {
unsigned int data_code;
unsigned int patch_id;
unsigned char mc_patch_data_id[2];
unsigned char mc_patch_data_len;
unsigned char init_flag;
unsigned int mc_patch_data_checksum;
unsigned int nb_dev_id;
unsigned int sb_dev_id;
unsigned char processor_rev_id[2];
unsigned char nb_rev_id;
unsigned char sb_rev_id;
unsigned char bios_api_rev;
unsigned char reserved1[3];
unsigned int match_reg[8];
};
struct microcode_amd {
struct microcode_header_amd hdr;
unsigned int mpb[0];
};
#define UCODE_MAX_SIZE (2048)
#define DEFAULT_UCODE_DATASIZE (896)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define DWSIZE (sizeof(u32))
/* For now we support a fixed ucode total size only */
#define get_totalsize(mc) \
((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
+ MC_HEADER_SIZE)
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
static struct equiv_cpu_entry *equiv_cpu_table;
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
cpu);
return -1;
}
asm volatile("movl %1, %%ecx; rdmsr"
: "=a" (csig->rev)
: "i" (0x0000008B) : "ecx");
printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
csig->rev);
return 0;
}
static int get_matching_microcode(int cpu, void *mc, int rev)
{
struct microcode_header_amd *mc_header = mc;
struct pci_dev *nb_pci_dev, *sb_pci_dev;
unsigned int current_cpu_id;
unsigned int equiv_cpu_id = 0x00;
unsigned int i = 0;
BUG_ON(equiv_cpu_table == NULL);
current_cpu_id = cpuid_eax(0x00000001);
while (equiv_cpu_table[i].installed_cpu != 0) {
if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
break;
}
i++;
}
if (!equiv_cpu_id) {
printk(KERN_ERR "microcode: CPU%d cpu_id "
"not found in equivalent cpu table \n", cpu);
return 0;
}
if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
printk(KERN_ERR
"microcode: CPU%d patch does not match "
"(patch is %x, cpu extended is %x) \n",
cpu, mc_header->processor_rev_id[0],
(equiv_cpu_id & 0xff));
return 0;
}
if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
printk(KERN_ERR "microcode: CPU%d patch does not match "
"(patch is %x, cpu base id is %x) \n",
cpu, mc_header->processor_rev_id[1],
((equiv_cpu_id >> 16) & 0xff));
return 0;
}
/* ucode may be northbridge specific */
if (mc_header->nb_dev_id) {
nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
(mc_header->nb_dev_id & 0xff),
NULL);
if ((!nb_pci_dev) ||
(mc_header->nb_rev_id != nb_pci_dev->revision)) {
printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
pci_dev_put(nb_pci_dev);
return 0;
}
pci_dev_put(nb_pci_dev);
}
/* ucode may be southbridge specific */
if (mc_header->sb_dev_id) {
sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
(mc_header->sb_dev_id & 0xff),
NULL);
if ((!sb_pci_dev) ||
(mc_header->sb_rev_id != sb_pci_dev->revision)) {
printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
pci_dev_put(sb_pci_dev);
return 0;
}
pci_dev_put(sb_pci_dev);
}
if (mc_header->patch_id <= rev)
return 0;
return 1;
}
static void apply_microcode_amd(int cpu)
{
unsigned long flags;
unsigned int eax, edx;
unsigned int rev;
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
struct microcode_amd *mc_amd = uci->mc;
unsigned long addr;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (mc_amd == NULL)
return;
spin_lock_irqsave(&microcode_update_lock, flags);
addr = (unsigned long)&mc_amd->hdr.data_code;
edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
asm volatile("movl %0, %%ecx; wrmsr" :
: "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
/* get patch id after patching */
asm volatile("movl %1, %%ecx; rdmsr"
: "=a" (rev)
: "i" (0x0000008B) : "ecx");
spin_unlock_irqrestore(&microcode_update_lock, flags);
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num,
mc_amd->hdr.patch_id, rev);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x \n",
cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
uci->cpu_sig.rev = rev;
}
static void * get_next_ucode(u8 *buf, unsigned int size,
int (*get_ucode_data)(void *, const void *, size_t),
unsigned int *mc_size)
{
unsigned int total_size;
#define UCODE_CONTAINER_SECTION_HDR 8
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
void *mc;
if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
return NULL;
if (section_hdr[0] != UCODE_UCODE_TYPE) {
printk(KERN_ERR "microcode: error! "
"Wrong microcode payload type field\n");
return NULL;
}
total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
printk(KERN_INFO "microcode: size %u, total_size %u\n",
size, total_size);
if (total_size > size || total_size > UCODE_MAX_SIZE) {
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
return NULL;
}
mc = vmalloc(UCODE_MAX_SIZE);
if (mc) {
memset(mc, 0, UCODE_MAX_SIZE);
if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
vfree(mc);
mc = NULL;
} else
*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
}
#undef UCODE_CONTAINER_SECTION_HDR
return mc;
}
static int install_equiv_cpu_table(u8 *buf,
int (*get_ucode_data)(void *, const void *, size_t))
{
#define UCODE_CONTAINER_HEADER_SIZE 12
u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
unsigned int *buf_pos = (unsigned int *)container_hdr;
unsigned long size;
if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
return 0;
size = buf_pos[2];
if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
printk(KERN_ERR "microcode: error! "
"Wrong microcode equivalnet cpu table\n");
return 0;
}
equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
if (!equiv_cpu_table) {
printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
return 0;
}
buf += UCODE_CONTAINER_HEADER_SIZE;
if (get_ucode_data(equiv_cpu_table, buf, size)) {
vfree(equiv_cpu_table);
return 0;
}
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
#undef UCODE_CONTAINER_HEADER_SIZE
}
static void free_equiv_cpu_table(void)
{
if (equiv_cpu_table) {
vfree(equiv_cpu_table);
equiv_cpu_table = NULL;
}
}
static int generic_load_microcode(int cpu, void *data, size_t size,
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u8 *ucode_ptr = data, *new_mc = NULL, *mc;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover;
unsigned long offset;
offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
if (!offset) {
printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
return -EINVAL;
}
ucode_ptr += offset;
leftover = size - offset;
while (leftover) {
unsigned int uninitialized_var(mc_size);
struct microcode_header_amd *mc_header;
mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
if (!mc)
break;
mc_header = (struct microcode_header_amd *)mc;
if (get_matching_microcode(cpu, mc, new_rev)) {
if (new_mc)
vfree(new_mc);
new_rev = mc_header->patch_id;
new_mc = mc;
} else
vfree(mc);
ucode_ptr += mc_size;
leftover -= mc_size;
}
if (new_mc) {
if (!leftover) {
if (uci->mc)
vfree(uci->mc);
uci->mc = new_mc;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else
vfree(new_mc);
}
free_equiv_cpu_table();
return (int)leftover;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
{
memcpy(to, from, n);
return 0;
}
static int request_microcode_fw(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
const struct firmware *firmware;
int ret;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
ret = request_firmware(&firmware, fw_name, device);
if (ret) {
printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
return ret;
}
ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
&get_ucode_fw);
release_firmware(firmware);
return ret;
}
static int request_microcode_user(int cpu, const void __user *buf, size_t size)
{
printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
"is not supported\n");
return -1;
}
static void microcode_fini_cpu_amd(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
vfree(uci->mc);
uci->mc = NULL;
}
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
};
struct microcode_ops * __init init_amd_microcode(void)
{
return &microcode_amd_ops;
}

View File

@ -0,0 +1,508 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to
* speculative nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
#define MICROCODE_VERSION "2.00"
struct microcode_ops *microcode_ops;
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(ucode_cpu_info);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
static int do_microcode_update(const void __user *buf, size_t size)
{
cpumask_t old;
int error = 0;
int cpu;
old = current->cpus_allowed;
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = microcode_ops->request_microcode_user(cpu, buf, size);
if (error < 0)
goto out;
if (!error)
microcode_ops->apply_microcode(cpu);
}
out:
set_cpus_allowed_ptr(current, &old);
return error;
}
static int microcode_open(struct inode *unused1, struct file *unused2)
{
cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
static ssize_t microcode_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
ssize_t ret;
if ((len >> PAGE_SHIFT) > num_physpages) {
printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
num_physpages);
return -EINVAL;
}
get_online_cpus();
mutex_lock(&microcode_mutex);
ret = do_microcode_update(buf, len);
if (!ret)
ret = (ssize_t)len;
mutex_unlock(&microcode_mutex);
put_online_cpus();
return ret;
}
static const struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
};
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
.fops = &microcode_fops,
};
static int __init microcode_dev_init(void)
{
int error;
error = misc_register(&microcode_dev);
if (error) {
printk(KERN_ERR
"microcode: can't misc_register on minor=%d\n",
MICROCODE_MINOR);
return error;
}
return 0;
}
static void microcode_dev_exit(void)
{
misc_deregister(&microcode_dev);
}
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while (0)
#endif
/* fake device for request_firmware */
struct platform_device *microcode_pdev;
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t sz)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
unsigned long val = simple_strtoul(buf, &end, 0);
int err = 0;
int cpu = dev->id;
if (end == buf)
return -EINVAL;
if (val == 1) {
cpumask_t old = current->cpus_allowed;
get_online_cpus();
if (cpu_online(cpu)) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid) {
err = microcode_ops->request_microcode_fw(cpu,
&microcode_pdev->dev);
if (!err)
microcode_ops->apply_microcode(cpu);
}
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
put_online_cpus();
}
if (err)
return err;
return sz;
}
static ssize_t version_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
}
static ssize_t pf_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
}
static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
static SYSDEV_ATTR(version, 0400, version_show, NULL);
static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
static struct attribute *mc_default_attrs[] = {
&attr_reload.attr,
&attr_version.attr,
&attr_processor_flags.attr,
NULL
};
static struct attribute_group mc_attr_group = {
.attrs = mc_default_attrs,
.name = "microcode",
};
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
mutex_lock(&microcode_mutex);
microcode_ops->microcode_fini_cpu(cpu);
uci->valid = 0;
mutex_unlock(&microcode_mutex);
}
static void collect_cpu_info(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
memset(uci, 0, sizeof(*uci));
if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
uci->valid = 1;
}
static int microcode_resume_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpu_signature nsig;
pr_debug("microcode: CPU%d resumed\n", cpu);
if (!uci->mc)
return 1;
/*
* Let's verify that the 'cached' ucode does belong
* to this cpu (a bit of paranoia):
*/
if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
microcode_fini_cpu(cpu);
return -1;
}
if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
microcode_fini_cpu(cpu);
/* Should we look for a new ucode here? */
return 1;
}
return 0;
}
void microcode_update_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int err = 0;
/*
* Check if the system resume is in progress (uci->valid != NULL),
* otherwise just request a firmware:
*/
if (uci->valid) {
err = microcode_resume_cpu(cpu);
} else {
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING)
err = microcode_ops->request_microcode_fw(cpu,
&microcode_pdev->dev);
}
if (!err)
microcode_ops->apply_microcode(cpu);
}
static void microcode_init_cpu(int cpu)
{
cpumask_t old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* We should bind the task to the CPU */
BUG_ON(raw_smp_processor_id() != cpu);
mutex_lock(&microcode_mutex);
microcode_update_cpu(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
static int mc_sysdev_add(struct sys_device *sys_dev)
{
int err, cpu = sys_dev->id;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d added\n", cpu);
memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
microcode_init_cpu(cpu);
return 0;
}
static int mc_sysdev_remove(struct sys_device *sys_dev)
{
int cpu = sys_dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
static int mc_sysdev_resume(struct sys_device *dev)
{
int cpu = dev->id;
if (!cpu_online(cpu))
return 0;
/* only CPU 0 will apply ucode here */
microcode_update_cpu(0);
return 0;
}
static struct sysdev_driver mc_sysdev_driver = {
.add = mc_sysdev_add,
.remove = mc_sysdev_remove,
.resume = mc_sysdev_resume,
};
static __cpuinit int
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
microcode_init_cpu(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
pr_debug("microcode: CPU%d added\n", cpu);
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
printk(KERN_ERR "microcode: Failed to create the sysfs "
"group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
pr_debug("microcode: CPU%d removed\n", cpu);
break;
case CPU_DEAD:
case CPU_UP_CANCELED_FROZEN:
/* The CPU refused to come up during a system resume */
microcode_fini_cpu(cpu);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
int error;
if (c->x86_vendor == X86_VENDOR_INTEL)
microcode_ops = init_intel_microcode();
else if (c->x86_vendor == X86_VENDOR_AMD)
microcode_ops = init_amd_microcode();
if (!microcode_ops) {
printk(KERN_ERR "microcode: no support for this CPU vendor\n");
return -ENODEV;
}
error = microcode_dev_init();
if (error)
return error;
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
if (IS_ERR(microcode_pdev)) {
microcode_dev_exit();
return PTR_ERR(microcode_pdev);
}
get_online_cpus();
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
if (error) {
microcode_dev_exit();
platform_device_unregister(microcode_pdev);
return error;
}
register_hotcpu_notifier(&mc_cpu_notifier);
printk(KERN_INFO
"Microcode Update Driver: v" MICROCODE_VERSION
" <tigran@aivazian.fsnet.co.uk>"
" <peter.oruba@amd.com>\n");
return 0;
}
static void __exit microcode_exit(void)
{
microcode_dev_exit();
unregister_hotcpu_notifier(&mc_cpu_notifier);
get_online_cpus();
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
platform_device_unregister(microcode_pdev);
microcode_ops = NULL;
printk(KERN_INFO
"Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
}
module_init(microcode_init);
module_exit(microcode_exit);

View File

@ -0,0 +1,480 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to
* speculative nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
struct microcode_header_intel {
unsigned int hdrver;
unsigned int rev;
unsigned int date;
unsigned int sig;
unsigned int cksum;
unsigned int ldrver;
unsigned int pf;
unsigned int datasize;
unsigned int totalsize;
unsigned int reserved[3];
};
struct microcode_intel {
struct microcode_header_intel hdr;
unsigned int bits[0];
};
/* microcode format is extended from prescott processors */
struct extended_signature {
unsigned int sig;
unsigned int pf;
unsigned int cksum;
};
struct extended_sigtable {
unsigned int count;
unsigned int cksum;
unsigned int reserved[3];
struct extended_signature sigs[0];
};
#define DEFAULT_UCODE_DATASIZE (2000)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.totalsize ? \
((struct microcode_intel *)mc)->hdr.totalsize : \
DEFAULT_UCODE_TOTALSIZE)
#define get_datasize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \
((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
#define sigmatch(s1, s2, p1, p2) \
(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
static DEFINE_SPINLOCK(microcode_update_lock);
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
unsigned int val[2];
memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
printk(KERN_ERR "microcode: CPU%d not a capable Intel "
"processor\n", cpu_num);
return -1;
}
csig->sig = cpuid_eax(0x00000001);
if ((c->x86_model >= 5) || (c->x86 > 6)) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig->pf = 1 << ((val[1] >> 18) & 7);
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
csig->sig, csig->pf, csig->rev);
return 0;
}
static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
{
return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
}
static inline int
update_match_revision(struct microcode_header_intel *mc_header, int rev)
{
return (mc_header->rev <= rev) ? 0 : 1;
}
static int microcode_sanity_check(void *mc)
{
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
struct extended_signature *ext_sig;
unsigned long total_size, data_size, ext_table_size;
int sum, orig_sum, ext_sigcount = 0, i;
total_size = get_totalsize(mc_header);
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
printk(KERN_ERR "microcode: error! "
"Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
printk(KERN_ERR "microcode: error! "
"Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
printk(KERN_ERR "microcode: error! "
"Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
printk(KERN_ERR "microcode: error! "
"Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
}
/* check extended table checksum */
if (ext_table_size) {
int ext_table_sum = 0;
int *ext_tablep = (int *)ext_header;
i = ext_table_size / DWSIZE;
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
printk(KERN_WARNING "microcode: aborting, "
"bad extended signature table checksum\n");
return -EINVAL;
}
}
/* calculate the checksum */
orig_sum = 0;
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
return 0;
/* check extended signature checksum */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
sum = orig_sum
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
}
return 0;
}
/*
* return 0 - no update found
* return 1 - found update
*/
static int
get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
{
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header;
unsigned long total_size = get_totalsize(mc_header);
int ext_sigcount, i;
struct extended_signature *ext_sig;
if (!update_match_revision(mc_header, rev))
return 0;
if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
return 1;
/* Look for ext. headers: */
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
return 0;
ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
ext_sigcount = ext_header->count;
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
for (i = 0; i < ext_sigcount; i++) {
if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
return 1;
ext_sig++;
}
return 0;
}
static void apply_microcode(int cpu)
{
unsigned long flags;
unsigned int val[2];
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct microcode_intel *mc_intel = uci->mc;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (mc_intel == NULL)
return;
/* serialize access to the physical write to MSR 0x79 */
spin_lock_irqsave(&microcode_update_lock, flags);
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) mc_intel->bits,
(unsigned long) mc_intel->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
spin_unlock_irqrestore(&microcode_update_lock, flags);
if (val[1] != mc_intel->hdr.rev) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x, date = %04x-%02x-%02x \n",
cpu_num, uci->cpu_sig.rev, val[1],
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
(mc_intel->hdr.date >> 16) & 0xff);
uci->cpu_sig.rev = val[1];
}
static int generic_load_microcode(int cpu, void *data, size_t size,
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u8 *ucode_ptr = data, *new_mc = NULL, *mc;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover = size;
while (leftover) {
struct microcode_header_intel mc_header;
unsigned int mc_size;
if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
break;
mc_size = get_totalsize(&mc_header);
if (!mc_size || mc_size > leftover) {
printk(KERN_ERR "microcode: error!"
"Bad data in microcode data file\n");
break;
}
mc = vmalloc(mc_size);
if (!mc)
break;
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
microcode_sanity_check(mc) < 0) {
vfree(mc);
break;
}
if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
if (new_mc)
vfree(new_mc);
new_rev = mc_header.rev;
new_mc = mc;
} else
vfree(mc);
ucode_ptr += mc_size;
leftover -= mc_size;
}
if (new_mc) {
if (!leftover) {
if (uci->mc)
vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else
vfree(new_mc);
}
return (int)leftover;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
{
memcpy(to, from, n);
return 0;
}
static int request_microcode_fw(int cpu, struct device *device)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
int ret;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
ret = request_firmware(&firmware, name, device);
if (ret) {
pr_debug("microcode: data file %s load failed\n", name);
return ret;
}
ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
&get_ucode_fw);
release_firmware(firmware);
return ret;
}
static int get_ucode_user(void *to, const void *from, size_t n)
{
return copy_from_user(to, from, n);
}
static int request_microcode_user(int cpu, const void __user *buf, size_t size)
{
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
}
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
vfree(uci->mc);
uci->mc = NULL;
}
struct microcode_ops microcode_intel_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode,
.microcode_fini_cpu = microcode_fini_cpu,
};
struct microcode_ops * __init init_intel_microcode(void)
{
return &microcode_intel_ops;
}

View File

@ -0,0 +1,37 @@
/*
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <asm/paravirt.h>
static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
{
__raw_spin_lock(lock);
}
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,
.spin_lock = __ticket_spin_lock,
.spin_lock_flags = default_spin_lock_flags,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}

View File

@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode);
}
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@ -349,6 +338,10 @@ struct pv_cpu_ops pv_cpu_ops = {
.write_ldt_entry = native_write_ldt_entry,
.write_gdt_entry = native_write_gdt_entry,
.write_idt_entry = native_write_idt_entry,
.alloc_ldt = paravirt_nop,
.free_ldt = paravirt_nop,
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@ -460,18 +453,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap,
};
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,
.spin_lock = __ticket_spin_lock,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);

View File

@ -76,47 +76,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return ((unsigned long *)tsk->thread.sp)[3];
}
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
static void cpu_exit_clear(void)
{
int cpu = raw_smp_processor_id();
idle_task_exit();
cpu_uninit();
irq_ctx_exit(cpu);
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
numa_remove_cpu(cpu);
c1e_remove_cpu(cpu);
}
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
/* mask all interrupts, flush any and all caches, and halt */
wbinvd_halt();
}
#else
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */
#endif
/*
* The idle thread. There's no useful work to be

View File

@ -86,30 +86,12 @@ void exit_idle(void)
__exit_idle();
}
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
#include <linux/nmi.h>
/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
idle_task_exit();
c1e_remove_cpu(raw_smp_processor_id());
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
local_irq_disable();
/* mask all interrupts, flush any and all caches, and halt */
wbinvd_halt();
}
#else
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */
#endif
/*
* The idle thread. There's no useful work to be

View File

@ -40,7 +40,9 @@ enum x86_regset {
REGSET_GENERAL,
REGSET_FP,
REGSET_XFP,
REGSET_IOPERM64 = REGSET_XFP,
REGSET_TLS,
REGSET_IOPERM32,
};
/*
@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
*/
static int ioperm_active(struct task_struct *target,
const struct user_regset *regset)
{
return target->thread.io_bitmap_max / regset->size;
}
static int ioperm_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
if (!target->thread.io_bitmap_ptr)
return -ENXIO;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
target->thread.io_bitmap_ptr,
0, IO_BITMAP_BYTES);
}
#ifdef CONFIG_X86_PTRACE_BTS
/*
* The configuration for a particular BTS hardware implementation.
@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = {
.size = sizeof(long), .align = sizeof(long),
.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
},
[REGSET_IOPERM64] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_LONGS,
.size = sizeof(long), .align = sizeof(long),
.active = ioperm_active, .get = ioperm_get
},
};
static const struct user_regset_view user_x86_64_view = {
@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = {
.active = regset_tls_active,
.get = regset_tls_get, .set = regset_tls_set
},
[REGSET_IOPERM32] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_BYTES / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = ioperm_active, .get = ioperm_get
},
};
static const struct user_regset_view user_x86_32_view = {
@ -1452,7 +1489,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code, int si_code)
{
struct siginfo info;
@ -1461,7 +1499,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
memset(&info, 0, sizeof(info));
info.si_signo = SIGTRAP;
info.si_code = TRAP_BRKPT;
info.si_code = si_code;
/* User-mode ip? */
info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
@ -1548,5 +1586,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
send_sigtrap(current, regs, 0);
send_sigtrap(current, regs, 0, TRAP_BRKPT);
}

View File

@ -581,6 +581,190 @@ static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
/*
* Some BIOSes seem to corrupt the low 64k of memory during events
* like suspend/resume and unplugging an HDMI cable. Reserve all
* remaining free memory in that area and fill it with a distinct
* pattern.
*/
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
#define MAX_SCAN_AREAS 8
static int __read_mostly memory_corruption_check = -1;
static unsigned __read_mostly corruption_check_size = 64*1024;
static unsigned __read_mostly corruption_check_period = 60; /* seconds */
static struct e820entry scan_areas[MAX_SCAN_AREAS];
static int num_scan_areas;
static int set_corruption_check(char *arg)
{
char *end;
memory_corruption_check = simple_strtol(arg, &end, 10);
return (*end == 0) ? 0 : -EINVAL;
}
early_param("memory_corruption_check", set_corruption_check);
static int set_corruption_check_period(char *arg)
{
char *end;
corruption_check_period = simple_strtoul(arg, &end, 10);
return (*end == 0) ? 0 : -EINVAL;
}
early_param("memory_corruption_check_period", set_corruption_check_period);
static int set_corruption_check_size(char *arg)
{
char *end;
unsigned size;
size = memparse(arg, &end);
if (*end == '\0')
corruption_check_size = size;
return (size == corruption_check_size) ? 0 : -EINVAL;
}
early_param("memory_corruption_check_size", set_corruption_check_size);
static void __init setup_bios_corruption_check(void)
{
u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
if (memory_corruption_check == -1) {
memory_corruption_check =
#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
1
#else
0
#endif
;
}
if (corruption_check_size == 0)
memory_corruption_check = 0;
if (!memory_corruption_check)
return;
corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
u64 size;
addr = find_e820_area_size(addr, &size, PAGE_SIZE);
if (addr == 0)
break;
if ((addr + size) > corruption_check_size)
size = corruption_check_size - addr;
if (size == 0)
break;
e820_update_range(addr, size, E820_RAM, E820_RESERVED);
scan_areas[num_scan_areas].addr = addr;
scan_areas[num_scan_areas].size = size;
num_scan_areas++;
/* Assume we've already mapped this early memory */
memset(__va(addr), 0, size);
addr += size;
}
printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
num_scan_areas);
update_e820();
}
static struct timer_list periodic_check_timer;
void check_for_bios_corruption(void)
{
int i;
int corruption = 0;
if (!memory_corruption_check)
return;
for(i = 0; i < num_scan_areas; i++) {
unsigned long *addr = __va(scan_areas[i].addr);
unsigned long size = scan_areas[i].size;
for(; size; addr++, size -= sizeof(unsigned long)) {
if (!*addr)
continue;
printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
addr, __pa(addr), *addr);
corruption = 1;
*addr = 0;
}
}
WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
}
static void periodic_check_for_corruption(unsigned long data)
{
check_for_bios_corruption();
mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
}
void start_periodic_check_for_corruption(void)
{
if (!memory_corruption_check || corruption_check_period == 0)
return;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
corruption_check_period);
init_timer(&periodic_check_timer);
periodic_check_timer.function = &periodic_check_for_corruption;
periodic_check_for_corruption(0);
}
#endif
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
printk(KERN_NOTICE
"%s detected: BIOS may corrupt low RAM, working it around.\n",
d->ident);
e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
return 0;
}
/* List of systems that have known low memory corruption BIOS problems */
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
#ifdef CONFIG_X86_RESERVE_LOW_64K
{
.callback = dmi_low_memory_corruption,
.ident = "AMI BIOS",
.matches = {
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
{
.callback = dmi_low_memory_corruption,
.ident = "Phoenix BIOS",
.matches = {
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
},
},
#endif
{}
};
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@ -715,6 +899,10 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
dmi_scan_machine();
dmi_check_system(bad_bios_dmi_table);
#ifdef CONFIG_X86_32
probe_roms();
#endif
@ -771,6 +959,10 @@ void __init setup_arch(char **cmdline_p)
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
setup_bios_corruption_check();
#endif
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
@ -799,8 +991,6 @@ void __init setup_arch(char **cmdline_p)
vsmp_init();
#endif
dmi_scan_machine();
io_delay_init();
/*
@ -903,3 +1093,5 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
}

View File

@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
#include <asm/syscall.h>
#include <asm/syscalls.h>
#include "sigframe.h"
@ -112,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
return do_sigaltstack(uss, uoss, regs->sp);
}
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp; \
}
#define COPY_SEG_STRICT(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define GET_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
loadsegment(seg, tmp); \
}
/*
* Do a signal return; undo the signal stack.
@ -120,28 +142,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
{
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
#define COPY(x) err |= __get_user(regs->x, &sc->x)
#define COPY_SEG(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp; }
#define COPY_SEG_STRICT(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp|3; }
#define GET_SEG(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
loadsegment(seg, tmp); }
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
@ -151,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY_SEG_STRICT(cs);
COPY_SEG_STRICT(ss);
{
unsigned int tmpflags;
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) |
(tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
}
{
void __user *buf;
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
}
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
err |= __get_user(*pax, &sc->ax);
return err;
@ -214,9 +212,8 @@ badframe:
return 0;
}
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
static long do_rt_sigreturn(struct pt_regs *regs)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
@ -242,10 +239,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
return ax;
badframe:
force_sig(SIGSEGV, current);
signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
/*
* Set up a signal frame.
*/
@ -337,39 +341,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
static int
setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
struct pt_regs *regs)
__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
struct pt_regs *regs)
{
struct sigframe __user *frame;
void __user *restorer;
int err = 0;
int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
return -EFAULT;
usig = current_thread_info()->exec_domain
&& current_thread_info()->exec_domain->signal_invmap
&& sig < 32
? current_thread_info()->exec_domain->signal_invmap[sig]
: sig;
if (__put_user(sig, &frame->sig))
return -EFAULT;
err = __put_user(usig, &frame->sig);
if (err)
goto give_sigsegv;
err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
return -EFAULT;
if (_NSIG_WORDS > 1) {
err = __copy_to_user(&frame->extramask, &set->sig[1],
sizeof(frame->extramask));
if (err)
goto give_sigsegv;
if (__copy_to_user(&frame->extramask, &set->sig[1],
sizeof(frame->extramask)))
return -EFAULT;
}
if (current->mm->context.vdso)
@ -394,7 +388,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
@ -409,38 +403,27 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
regs->cs = __USER_CS;
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0;
int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
return -EFAULT;
usig = current_thread_info()->exec_domain
&& current_thread_info()->exec_domain->signal_invmap
&& sig < 32
? current_thread_info()->exec_domain->signal_invmap[sig]
: sig;
err |= __put_user(usig, &frame->sig);
err |= __put_user(sig, &frame->sig);
err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
goto give_sigsegv;
return -EFAULT;
/* Create the ucontext. */
if (cpu_has_xsave)
@ -456,7 +439,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@ -476,12 +459,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
regs->ip = (unsigned long)ka->sa.sa_handler;
regs->ax = (unsigned long)usig;
regs->ax = (unsigned long)sig;
regs->dx = (unsigned long)&frame->info;
regs->cx = (unsigned long)&frame->uc;
@ -491,15 +474,48 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}
/*
* OK, we're invoking a handler:
*/
static int signr_convert(int sig)
{
struct thread_info *info = current_thread_info();
if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
return info->exec_domain->signal_invmap[sig];
return sig;
}
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
int usig = signr_convert(sig);
int ret;
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
else
ret = ia32_setup_frame(usig, ka, set, regs);
} else
ret = __setup_rt_frame(sig, ka, info, set, regs);
if (ret) {
force_sigsegv(sig, current);
return -EFAULT;
}
return ret;
}
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
@ -507,9 +523,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
if ((long)regs->orig_ax >= 0) {
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
switch (regs->ax) {
switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@ -536,15 +552,20 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
/* Set up the stack frame */
if (ka->sa.sa_flags & SA_SIGINFO)
ret = setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = setup_frame(sig, ka, oldset, regs);
ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret)
return ret;
#ifdef CONFIG_X86_64
/*
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
#endif
/*
* Clear the direction flag as per the ABI for function entry.
*/
@ -571,6 +592,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
#define NR_restart_syscall __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@ -623,9 +645,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
if ((long)regs->orig_ax >= 0) {
if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
switch (regs->ax) {
switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@ -634,7 +656,7 @@ static void do_signal(struct pt_regs *regs)
break;
case -ERESTART_RESTARTBLOCK:
regs->ax = __NR_restart_syscall;
regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@ -657,6 +679,12 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
@ -666,5 +694,23 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
tracehook_notify_resume(regs);
}
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk(KERN_INFO
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, me);
}

View File

@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
return do_sigaltstack(uss, uoss, regs->sp);
}
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY_SEG_STRICT(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
/*
* Do a signal return; undo the signal stack.
*/
@ -59,13 +69,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
{
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
#define COPY(x) (err |= __get_user(regs->x, &sc->x))
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(r8);
@ -80,34 +90,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
{
unsigned cs;
err |= __get_user(cs, &sc->cs);
regs->cs = cs | 3; /* Force into user mode */
}
COPY_SEG_STRICT(cs);
{
unsigned int tmpflags;
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
}
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
{
struct _fpstate __user *buf;
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
}
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
err |= __get_user(*pax, &sc->ax);
return err;
}
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
sigset_t set;
unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@ -130,10 +130,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax;
badframe:
signal_fault(regs, frame, "sigreturn");
signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
/*
* Set up a signal frame.
*/
@ -195,8 +200,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
return (void __user *)round_down(sp - size, 64);
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
@ -209,17 +214,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
err |= -1;
return -EFAULT;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
goto give_sigsegv;
if (copy_siginfo_to_user(&frame->info, info))
return -EFAULT;
}
/* Create the ucontext. */
@ -247,11 +251,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
goto give_sigsegv;
return -EFAULT;
}
if (err)
goto give_sigsegv;
return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
@ -271,15 +275,45 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}
/*
* OK, we're invoking a handler
*/
static int signr_convert(int sig)
{
return sig;
}
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else
#define is_ia32 0
#endif
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
int usig = signr_convert(sig);
int ret;
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
else
ret = ia32_setup_frame(usig, ka, set, regs);
} else
ret = __setup_rt_frame(sig, ka, info, set, regs);
if (ret) {
force_sigsegv(sig, current);
return -EFAULT;
}
return ret;
}
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@ -317,51 +351,48 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32)) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = ia32_setup_frame(sig, ka, oldset, regs);
} else
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret == 0) {
/*
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
if (ret)
return ret;
/*
* Clear the direction flag as per the ABI for function entry.
*/
regs->flags &= ~X86_EFLAGS_DF;
#ifdef CONFIG_X86_64
/*
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
#endif
/*
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
/*
* Clear the direction flag as per the ABI for function entry.
*/
regs->flags &= ~X86_EFLAGS_DF;
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
sigaddset(&current->blocked, sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
/*
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
tracehook_signal_handler(sig, info, ka, regs,
test_thread_flag(TIF_SINGLESTEP));
}
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
sigaddset(&current->blocked, sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
return ret;
tracehook_signal_handler(sig, info, ka, regs,
test_thread_flag(TIF_SINGLESTEP));
return 0;
}
#define NR_restart_syscall \
test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@ -391,7 +422,8 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Re-enable any watchpoints before delivering the
/*
* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
@ -399,7 +431,7 @@ static void do_signal(struct pt_regs *regs)
if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
* A signal was successfully delivered; the saved
@ -422,10 +454,9 @@ static void do_signal(struct pt_regs *regs)
regs->ax = regs->orig_ax;
regs->ip -= 2;
break;
case -ERESTART_RESTARTBLOCK:
regs->ax = test_thread_flag(TIF_IA32) ?
__NR_ia32_restart_syscall :
__NR_restart_syscall;
regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@ -441,14 +472,18 @@ static void do_signal(struct pt_regs *regs)
}
}
void do_notify_resume(struct pt_regs *regs, void *unused,
__u32 thread_info_flags)
/*
* notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#ifdef CONFIG_X86_MCE
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user();
#endif /* CONFIG_X86_MCE */
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
@ -458,17 +493,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
me->comm, me->pid, where, frame, regs->ip,
regs->sp, regs->orig_ax);
printk(KERN_INFO
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk("\n");
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, me);

View File

@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus,
.cpu_up = native_cpu_up,
.smp_cpus_done = native_smp_cpus_done,
.smp_send_stop = native_smp_send_stop,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
.cpu_die = native_cpu_die,
.cpu_disable = native_cpu_disable,
.play_dead = native_play_dead,
.send_call_func_ipi = native_send_call_func_ipi,
.send_call_func_single_ipi = native_send_call_func_single_ipi,
};

View File

@ -52,6 +52,7 @@
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
@ -1344,7 +1345,29 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu);
}
int __cpu_disable(void)
void cpu_disable_common(void)
{
int cpu = smp_processor_id();
/*
* HACK:
* Allow any queued timer interrupts to get serviced
* This is only a temporary solution until we cleanup
* fixup_irqs as we do for IA64.
*/
local_irq_enable();
mdelay(1);
local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
}
int native_cpu_disable(void)
{
int cpu = smp_processor_id();
@ -1363,27 +1386,11 @@ int __cpu_disable(void)
stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
/*
* HACK:
* Allow any queued timer interrupts to get serviced
* This is only a temporary solution until we cleanup
* fixup_irqs as we do for IA64.
*/
local_irq_enable();
mdelay(1);
local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
cpu_disable_common();
return 0;
}
void __cpu_die(unsigned int cpu)
void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
@ -1400,15 +1407,45 @@ void __cpu_die(unsigned int cpu)
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
void play_dead_common(void)
{
idle_task_exit();
reset_lazy_tlbstate();
irq_ctx_exit(raw_smp_processor_id());
c1e_remove_cpu(raw_smp_processor_id());
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
}
void native_play_dead(void)
{
play_dead_common();
wbinvd_halt();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(void)
int native_cpu_disable(void)
{
return -ENOSYS;
}
void __cpu_die(unsigned int cpu)
void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
}
void native_play_dead(void)
{
BUG();
}
#endif

View File

@ -241,3 +241,11 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
void reset_lazy_tlbstate(void)
{
int cpu = raw_smp_processor_id();
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}

View File

@ -891,6 +891,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
unsigned int condition;
int si_code;
trace_hardirqs_fixup();
@ -935,8 +936,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
goto clear_TF_reenable;
}
si_code = get_si_code((unsigned long)condition);
/* Ok, finally something we can handle */
send_sigtrap(tsk, regs, error_code);
send_sigtrap(tsk, regs, error_code, si_code);
/*
* Disable additional traps. They'll be re-enabled when

View File

@ -940,7 +940,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs,
tsk->thread.error_code = error_code;
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
info.si_code = get_si_code(condition);
info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
force_sig_info(SIGTRAP, &info, tsk);

View File

@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf)
* Start with clearing the user buffer. This will present a
* clean context for the bytes not touched by the fxsave/xsave.
*/
__clear_user(buf, sig_xstate_size);
err = __clear_user(buf, sig_xstate_size);
if (err)
return err;
if (task_thread_info(tsk)->status & TS_XSAVE)
err = xsave_user(buf);
@ -114,6 +116,8 @@ int save_i387_xstate(void __user *buf)
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
u64 xstate_bv;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
@ -121,6 +125,31 @@ int save_i387_xstate(void __user *buf)
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_size
- FP_XSTATE_MAGIC2_SIZE));
/*
* Read the xstate_bv which we copied (directly from the cpu or
* from the state in task struct) to the user buffers and
* set the FP/SSE bits.
*/
err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware apps can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
xstate_bv |= XSTATE_FPSSE;
err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
if (err)
return err;
}
return 1;
@ -272,7 +301,7 @@ void __cpuinit xsave_init(void)
/*
* setup the xstate image representing the init state
*/
void setup_xstate_init(void)
static void __init setup_xstate_init(void)
{
init_xstate_buf = alloc_bootmem(xstate_size);
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;

View File

@ -914,15 +914,15 @@ LIST_HEAD(pgd_list);
void vmalloc_sync_all(void)
{
#ifdef CONFIG_X86_32
unsigned long start = VMALLOC_START & PGDIR_MASK;
unsigned long address;
#ifdef CONFIG_X86_32
if (SHARED_KERNEL_PMD)
return;
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
for (address = VMALLOC_START & PMD_MASK;
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
unsigned long flags;
struct page *page;
@ -935,10 +935,8 @@ void vmalloc_sync_all(void)
spin_unlock_irqrestore(&pgd_lock, flags);
}
#else /* CONFIG_X86_64 */
unsigned long start = VMALLOC_START & PGDIR_MASK;
unsigned long address;
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
unsigned long flags;
struct page *page;

View File

@ -31,6 +31,7 @@
#include <linux/cpumask.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@ -969,6 +970,8 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize;
int tmp;
start_periodic_check_for_corruption();
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif

View File

@ -31,6 +31,7 @@
#include <linux/nmi.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@ -881,6 +882,8 @@ void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;
start_periodic_check_for_corruption();
pci_iommu_alloc();
/* clear_bss() already clear the empty_zero_page */

View File

@ -24,19 +24,27 @@
#ifdef CONFIG_X86_64
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map)
return x - __START_KERNEL_map + phys_base;
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
static inline int phys_addr_valid(unsigned long addr)
{
return addr < (1UL << boot_cpu_data.x86_phys_bits);
}
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
x += phys_base;
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
!phys_addr_valid(x));
}
return x;
}
EXPORT_SYMBOL(__phys_addr);
#else
static inline int phys_addr_valid(unsigned long addr)
@ -44,6 +52,17 @@ static inline int phys_addr_valid(unsigned long addr)
return 1;
}
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
/* VMALLOC_* aren't constants; not available at the boot time */
VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING &&
is_vmalloc_addr((void *)x)));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
#endif
#endif
int page_is_ram(unsigned long pagenr)

View File

@ -26,5 +26,13 @@ config XEN_MAX_DOMAIN_MEMORY
config XEN_SAVE_RESTORE
bool
depends on PM
default y
depends on XEN && PM
default y
config XEN_DEBUG_FS
bool "Enable Xen debug and tuning parameters in debugfs"
depends on XEN && DEBUG_FS
default n
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.

View File

@ -1,4 +1,12 @@
obj-y := enlighten.o setup.o multicalls.o mmu.o \
ifdef CONFIG_FTRACE
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
CFLAGS_REMOVE_time.o = -pg
CFLAGS_REMOVE_irq.o = -pg
endif
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm_$(BITS).o grant-table.o suspend.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smp.o spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o

123
arch/x86/xen/debugfs.c Normal file
View File

@ -0,0 +1,123 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include "debugfs.h"
static struct dentry *d_xen_debug;
struct dentry * __init xen_init_debugfs(void)
{
if (!d_xen_debug) {
d_xen_debug = debugfs_create_dir("xen", NULL);
if (!d_xen_debug)
pr_warning("Could not create 'xen' debugfs directory\n");
}
return d_xen_debug;
}
struct array_data
{
void *array;
unsigned elements;
};
static int u32_array_open(struct inode *inode, struct file *file)
{
file->private_data = NULL;
return nonseekable_open(inode, file);
}
static size_t format_array(char *buf, size_t bufsize, const char *fmt,
u32 *array, unsigned array_size)
{
size_t ret = 0;
unsigned i;
for(i = 0; i < array_size; i++) {
size_t len;
len = snprintf(buf, bufsize, fmt, array[i]);
len++; /* ' ' or '\n' */
ret += len;
if (buf) {
buf += len;
bufsize -= len;
buf[-1] = (i == array_size-1) ? '\n' : ' ';
}
}
ret++; /* \0 */
if (buf)
*buf = '\0';
return ret;
}
static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
{
size_t len = format_array(NULL, 0, fmt, array, array_size);
char *ret;
ret = kmalloc(len, GFP_KERNEL);
if (ret == NULL)
return NULL;
format_array(ret, len, fmt, array, array_size);
return ret;
}
static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
loff_t *ppos)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct array_data *data = inode->i_private;
size_t size;
if (*ppos == 0) {
if (file->private_data) {
kfree(file->private_data);
file->private_data = NULL;
}
file->private_data = format_array_alloc("%u", data->array, data->elements);
}
size = 0;
if (file->private_data)
size = strlen(file->private_data);
return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
}
static int xen_array_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static struct file_operations u32_array_fops = {
.owner = THIS_MODULE,
.open = u32_array_open,
.release= xen_array_release,
.read = u32_array_read,
};
struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
struct dentry *parent,
u32 *array, unsigned elements)
{
struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return NULL;
data->array = array;
data->elements = elements;
return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
}

10
arch/x86/xen/debugfs.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef _XEN_DEBUGFS_H
#define _XEN_DEBUGFS_H
struct dentry * __init xen_init_debugfs(void);
struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
struct dentry *parent,
u32 *array, unsigned elements);
#endif /* _XEN_DEBUGFS_H */

View File

@ -30,7 +30,6 @@
#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
#include <xen/interface/sched.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
@ -58,6 +57,9 @@ EXPORT_SYMBOL_GPL(hypercall_page);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
/*
* Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest.
@ -111,7 +113,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
*
* 0: not available, 1: available
*/
static int have_vcpu_info_placement = 1;
static int have_vcpu_info_placement =
#ifdef CONFIG_X86_32
1
#else
0
#endif
;
static void xen_vcpu_setup(int cpu)
{
@ -227,94 +236,6 @@ static unsigned long xen_get_debugreg(int reg)
return HYPERVISOR_get_debugreg(reg);
}
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
static void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
}
static void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/* We don't need to worry about being preempted here, since
either a) interrupts are disabled, so no preemption, or b)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
BUG();
}
static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
else
xen_safe_halt();
}
static void xen_leave_lazy(void)
{
paravirt_leave_lazy(paravirt_get_lazy_mode());
@ -326,6 +247,59 @@ static unsigned long xen_store_tr(void)
return 0;
}
/*
* Set the page permissions for a particular virtual address. If the
* address is a vmalloc mapping (or other non-linear mapping), then
* find the linear mapping of the page and also set its protections to
* match.
*/
static void set_aliased_prot(void *v, pgprot_t prot)
{
int level;
pte_t *ptep;
pte_t pte;
unsigned long pfn;
struct page *page;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
page = pfn_to_page(pfn);
pte = pfn_pte(pfn, prot);
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
if (!PageHighMem(page)) {
void *av = __va(PFN_PHYS(pfn));
if (av != v)
if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
BUG();
} else
kmap_flush_unused();
}
static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
int i;
for(i = 0; i < entries; i += entries_per_page)
set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
}
static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
{
const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
int i;
for(i = 0; i < entries; i += entries_per_page)
set_aliased_prot(ldt + i, PAGE_KERNEL);
}
static void xen_set_ldt(const void *addr, unsigned entries)
{
struct mmuext_op *op;
@ -426,8 +400,7 @@ static void xen_load_gs_index(unsigned int idx)
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
{
unsigned long lp = (unsigned long)&dt[entrynum];
xmaddr_t mach_lp = virt_to_machine(lp);
xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
u64 entry = *(u64 *)ptr;
preempt_disable();
@ -560,7 +533,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
}
static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@ -835,6 +808,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
ret = -EFAULT;
break;
#endif
case MSR_STAR:
case MSR_CSTAR:
case MSR_LSTAR:
case MSR_SYSCALL_MASK:
case MSR_IA32_SYSENTER_CS:
case MSR_IA32_SYSENTER_ESP:
case MSR_IA32_SYSENTER_EIP:
/* Fast syscall setup is all done in hypercalls, so
these are all ignored. Stub them out here to stop
Xen console noise. */
break;
default:
ret = native_write_msr_safe(msr, low, high);
}
@ -878,8 +864,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
SetPagePinned(page);
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
if (level == PT_PTE)
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
} else
/* make sure there are no stray mappings of
@ -947,7 +933,7 @@ static void xen_release_ptpage(unsigned long pfn, unsigned level)
if (PagePinned(page)) {
if (!PageHighMem(page)) {
if (level == PT_PTE)
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
@ -994,6 +980,7 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
}
#endif
#ifdef CONFIG_X86_32
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
@ -1012,6 +999,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
xen_set_pte(ptep, pte);
}
#endif
static __init void xen_pagetable_setup_start(pgd_t *base)
{
@ -1078,7 +1066,6 @@ void xen_setup_vcpu_info_placement(void)
/* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */
#ifdef CONFIG_X86_32
if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n");
@ -1088,7 +1075,6 @@ void xen_setup_vcpu_info_placement(void)
pv_irq_ops.irq_enable = xen_irq_enable_direct;
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
}
#endif
}
static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@ -1109,12 +1095,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
goto patch_site
switch (type) {
#ifdef CONFIG_X86_32
SITE(pv_irq_ops, irq_enable);
SITE(pv_irq_ops, irq_disable);
SITE(pv_irq_ops, save_fl);
SITE(pv_irq_ops, restore_fl);
#endif /* CONFIG_X86_32 */
#undef SITE
patch_site:
@ -1252,6 +1236,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.load_gs_index = xen_load_gs_index,
#endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
.store_gdt = native_store_gdt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr,
@ -1273,36 +1260,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
},
};
static void __init __xen_init_IRQ(void)
{
#ifdef CONFIG_X86_64
int i;
/* Create identity vector->irq map */
for(i = 0; i < NR_VECTORS; i++) {
int cpu;
for_each_possible_cpu(cpu)
per_cpu(vector_irq, cpu)[i] = i;
}
#endif /* CONFIG_X86_64 */
xen_init_IRQ();
}
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = __xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
.adjust_exception_frame = xen_adjust_exception_frame,
#endif
};
static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC
.setup_boot_clock = paravirt_nop,
@ -1443,7 +1400,7 @@ static void __init xen_reserve_top(void)
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
top = pp.virt_start;
reserve_top_address(-top + 2 * PAGE_SIZE);
reserve_top_address(-top);
#endif /* CONFIG_X86_32 */
}
@ -1477,48 +1434,11 @@ static void *m2v(phys_addr_t maddr)
return __ka(m2p(maddr));
}
#ifdef CONFIG_X86_64
static void walk(pgd_t *pgd, unsigned long addr)
{
unsigned l4idx = pgd_index(addr);
unsigned l3idx = pud_index(addr);
unsigned l2idx = pmd_index(addr);
unsigned l1idx = pte_index(addr);
pgd_t l4;
pud_t l3;
pmd_t l2;
pte_t l1;
xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
pgd, addr, l4idx, l3idx, l2idx, l1idx);
l4 = pgd[l4idx];
xen_raw_printk(" l4: %016lx\n", l4.pgd);
xen_raw_printk(" %016lx\n", pgd_val(l4));
l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
xen_raw_printk(" l3: %016lx\n", l3.pud);
xen_raw_printk(" %016lx\n", pud_val(l3));
l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
xen_raw_printk(" l2: %016lx\n", l2.pmd);
xen_raw_printk(" %016lx\n", pmd_val(l2));
l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
xen_raw_printk(" l1: %016lx\n", l1.pte);
xen_raw_printk(" %016lx\n", pte_val(l1));
}
#endif
static void set_page_prot(void *addr, pgprot_t prot)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
addr, pfn, get_phys_to_machine(pfn),
pgprot_val(prot), pte.pte);
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG();
}
@ -1694,6 +1614,8 @@ asmlinkage void __init xen_start_kernel(void)
if (!xen_start_info)
return;
xen_domain_type = XEN_PV_DOMAIN;
BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
xen_setup_features();
@ -1703,10 +1625,11 @@ asmlinkage void __init xen_start_kernel(void)
pv_init_ops = xen_init_ops;
pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops;
pv_irq_ops = xen_irq_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
xen_init_irq_ops();
#ifdef CONFIG_X86_LOCAL_APIC
/*
* set up the basic apic ops.
@ -1737,7 +1660,7 @@ asmlinkage void __init xen_start_kernel(void)
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (!is_initial_xendomain())
if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
/* Don't do the full vcpu_info placement stuff until we have a
@ -1772,7 +1695,7 @@ asmlinkage void __init xen_start_kernel(void)
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
if (!is_initial_xendomain()) {
if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
@ -1780,15 +1703,6 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("about to get started...\n");
#if 0
xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
&boot_params, __pa_symbol(&boot_params),
__va(__pa_symbol(&boot_params)));
walk(pgd, &boot_params);
walk(pgd, __va(__pa(&boot_params)));
#endif
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();

143
arch/x86/xen/irq.c Normal file
View File

@ -0,0 +1,143 @@
#include <linux/hardirq.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include "xen-ops.h"
/*
* Force a proper event-channel callback from Xen after clearing the
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
void xen_force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
static void __init __xen_init_IRQ(void)
{
#ifdef CONFIG_X86_64
int i;
/* Create identity vector->irq map */
for(i = 0; i < NR_VECTORS; i++) {
int cpu;
for_each_possible_cpu(cpu)
per_cpu(vector_irq, cpu)[i] = i;
}
#endif /* CONFIG_X86_64 */
xen_init_IRQ();
}
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
static void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
}
}
static void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/* We don't need to worry about being preempted here, since
either a) interrupts are disabled, so no preemption, or b)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
}
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
BUG();
}
static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
else
xen_safe_halt();
}
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = __xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
.adjust_exception_frame = xen_adjust_exception_frame,
#endif
};
void __init xen_init_irq_ops()
{
pv_irq_ops = xen_irq_ops;
}

View File

@ -40,6 +40,7 @@
*/
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/bug.h>
#include <asm/pgtable.h>
@ -57,6 +58,61 @@
#include "multicalls.h"
#include "mmu.h"
#include "debugfs.h"
#define MMU_UPDATE_HISTO 30
#ifdef CONFIG_XEN_DEBUG_FS
static struct {
u32 pgd_update;
u32 pgd_update_pinned;
u32 pgd_update_batched;
u32 pud_update;
u32 pud_update_pinned;
u32 pud_update_batched;
u32 pmd_update;
u32 pmd_update_pinned;
u32 pmd_update_batched;
u32 pte_update;
u32 pte_update_pinned;
u32 pte_update_batched;
u32 mmu_update;
u32 mmu_update_extended;
u32 mmu_update_histo[MMU_UPDATE_HISTO];
u32 prot_commit;
u32 prot_commit_batched;
u32 set_pte_at;
u32 set_pte_at_batched;
u32 set_pte_at_pinned;
u32 set_pte_at_current;
u32 set_pte_at_kernel;
} mmu_stats;
static u8 zero_stats;
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&mmu_stats, 0, sizeof(mmu_stats));
zero_stats = 0;
}
}
#define ADD_STATS(elem, val) \
do { check_zero(); mmu_stats.elem += (val); } while(0)
#else /* !CONFIG_XEN_DEBUG_FS */
#define ADD_STATS(elem, val) do { (void)(val); } while(0)
#endif /* CONFIG_XEN_DEBUG_FS */
/*
* Just beyond the highest usermode address. STACK_TOP_MAX has a
@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr)
}
static bool page_pinned(void *ptr)
static bool xen_page_pinned(void *ptr)
{
struct page *page = virt_to_page(ptr);
return PagePinned(page);
}
static void extend_mmu_update(const struct mmu_update *update)
static void xen_extend_mmu_update(const struct mmu_update *update)
{
struct multicall_space mcs;
struct mmu_update *u;
mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
if (mcs.mc != NULL)
if (mcs.mc != NULL) {
ADD_STATS(mmu_update_extended, 1);
ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
mcs.mc->args[1]++;
else {
if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
else
ADD_STATS(mmu_update_histo[0], 1);
} else {
ADD_STATS(mmu_update, 1);
mcs = __xen_mc_entry(sizeof(*u));
MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
ADD_STATS(mmu_update_histo[1], 1);
}
u = mcs.args;
@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
/* ptr may be ioremapped for 64-bit pagetable setup */
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
u.val = pmd_val_ma(val);
extend_mmu_update(&u);
xen_extend_mmu_update(&u);
ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU);
@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
void xen_set_pmd(pmd_t *ptr, pmd_t val)
{
ADD_STATS(pmd_update, 1);
/* If page is not pinned, we can just update the entry
directly */
if (!page_pinned(ptr)) {
if (!xen_page_pinned(ptr)) {
*ptr = val;
return;
}
ADD_STATS(pmd_update_pinned, 1);
xen_set_pmd_hyper(ptr, val);
}
@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm == &init_mm)
preempt_disable();
ADD_STATS(set_pte_at, 1);
// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
ADD_STATS(set_pte_at_current, mm == current->mm);
ADD_STATS(set_pte_at_kernel, mm == &init_mm);
if (mm == current->mm || mm == &init_mm) {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
struct multicall_space mcs;
mcs = xen_mc_entry(0);
MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
ADD_STATS(set_pte_at_batched, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU);
goto out;
} else
@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
u.val = pte_val_ma(pte);
extend_mmu_update(&u);
xen_extend_mmu_update(&u);
ADD_STATS(prot_commit, 1);
ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
/* ptr may be ioremapped for 64-bit pagetable setup */
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
u.val = pud_val_ma(val);
extend_mmu_update(&u);
xen_extend_mmu_update(&u);
ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU);
@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
void xen_set_pud(pud_t *ptr, pud_t val)
{
ADD_STATS(pud_update, 1);
/* If page is not pinned, we can just update the entry
directly */
if (!page_pinned(ptr)) {
if (!xen_page_pinned(ptr)) {
*ptr = val;
return;
}
ADD_STATS(pud_update_pinned, 1);
xen_set_pud_hyper(ptr, val);
}
void xen_set_pte(pte_t *ptep, pte_t pte)
{
ADD_STATS(pte_update, 1);
// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
#ifdef CONFIG_X86_PAE
ptep->pte_high = pte.pte_high;
smp_wmb();
@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
u.ptr = virt_to_machine(ptr).maddr;
u.val = pgd_val_ma(val);
extend_mmu_update(&u);
xen_extend_mmu_update(&u);
}
/*
@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
{
pgd_t *user_ptr = xen_get_user_pgd(ptr);
ADD_STATS(pgd_update, 1);
/* If page is not pinned, we can just update the entry
directly */
if (!page_pinned(ptr)) {
if (!xen_page_pinned(ptr)) {
*ptr = val;
if (user_ptr) {
WARN_ON(page_pinned(user_ptr));
WARN_ON(xen_page_pinned(user_ptr));
*user_ptr = val;
}
return;
}
ADD_STATS(pgd_update_pinned, 1);
ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
/* If it's pinned, then we can at least batch the kernel and
user updates together. */
xen_mc_batch();
@ -555,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
unsigned long limit)
static int xen_pgd_walk(struct mm_struct *mm,
int (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
{
pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@ -590,8 +689,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pmdidx_limit = 0;
#endif
flush |= (*func)(virt_to_page(pgd), PT_PGD);
for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
pud_t *pud;
@ -604,7 +701,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pud = pud_offset(&pgd[pgdidx], 0);
if (PTRS_PER_PUD > 1) /* not folded */
flush |= (*func)(virt_to_page(pud), PT_PUD);
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
pmd_t *pmd;
@ -619,7 +716,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pmd = pmd_offset(&pud[pudidx], 0);
if (PTRS_PER_PMD > 1) /* not folded */
flush |= (*func)(virt_to_page(pmd), PT_PMD);
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
struct page *pte;
@ -633,28 +730,34 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
continue;
pte = pmd_page(pmd[pmdidx]);
flush |= (*func)(pte, PT_PTE);
flush |= (*func)(mm, pte, PT_PTE);
}
}
}
out:
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
return flush;
}
static spinlock_t *lock_pte(struct page *page)
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
{
spinlock_t *ptl = NULL;
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
#if USE_SPLIT_PTLOCKS
ptl = __pte_lockptr(page);
spin_lock(ptl);
spin_lock_nest_lock(ptl, &mm->page_table_lock);
#endif
return ptl;
}
static void do_unlock(void *v)
static void xen_pte_unlock(void *v)
{
spinlock_t *ptl = v;
spin_unlock(ptl);
@ -672,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
}
static int pin_page(struct page *page, enum pt_level level)
static int xen_pin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
unsigned pgfl = TestSetPagePinned(page);
int flush;
@ -691,21 +795,40 @@ static int pin_page(struct page *page, enum pt_level level)
flush = 0;
/*
* We need to hold the pagetable lock between the time
* we make the pagetable RO and when we actually pin
* it. If we don't, then other users may come in and
* attempt to update the pagetable by writing it,
* which will fail because the memory is RO but not
* pinned, so Xen won't do the trap'n'emulate.
*
* If we're using split pte locks, we can't hold the
* entire pagetable's worth of locks during the
* traverse, because we may wrap the preempt count (8
* bits). The solution is to mark RO and pin each PTE
* page while holding the lock. This means the number
* of locks we end up holding is never more than a
* batch size (~32 entries, at present).
*
* If we're not using split pte locks, we needn't pin
* the PTE pages independently, because we're
* protected by the overall pagetable lock.
*/
ptl = NULL;
if (level == PT_PTE)
ptl = lock_pte(page);
ptl = xen_pte_lock(page, mm);
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
pfn_pte(pfn, PAGE_KERNEL_RO),
level == PT_PGD ? UVMF_TLB_FLUSH : 0);
if (level == PT_PTE)
if (ptl) {
xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
if (ptl) {
/* Queue a deferred unlock for when this batch
is completed. */
xen_mc_callback(do_unlock, ptl);
xen_mc_callback(xen_pte_unlock, ptl);
}
}
@ -715,11 +838,11 @@ static int pin_page(struct page *page, enum pt_level level)
/* This is called just after a mm has been created, but it has not
been used yet. We need to make sure that its pagetable is all
read-only, and can be pinned. */
void xen_pgd_pin(pgd_t *pgd)
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
xen_mc_batch();
if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for kmap_flush_unused */
xen_mc_issue(0);
kmap_flush_unused();
@ -733,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd)
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
if (user_pgd) {
pin_page(virt_to_page(user_pgd), PT_PGD);
xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
}
}
#else /* CONFIG_X86_32 */
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is pinnable */
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
PT_PMD);
#endif
xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
#endif /* CONFIG_X86_64 */
xen_mc_issue(0);
}
static void xen_pgd_pin(struct mm_struct *mm)
{
__xen_pgd_pin(mm, mm->pgd);
}
/*
* On save, we need to pin all pagetables to make sure they get their
* mfns turned into pfns. Search the list for any unpinned pgds and pin
* them (unpinned pgds are not currently in use, probably because the
* process is under construction or destruction).
*
* Expected to be called in stop_machine() ("equivalent to taking
* every spinlock in the system"), so the locking doesn't really
* matter all that much.
*/
void xen_mm_pin_all(void)
{
@ -762,7 +895,7 @@ void xen_mm_pin_all(void)
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
xen_pgd_pin((pgd_t *)page_address(page));
__xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
SetPageSavePinned(page);
}
}
@ -775,7 +908,8 @@ void xen_mm_pin_all(void)
* that's before we have page structures to store the bits. So do all
* the book-keeping now.
*/
static __init int mark_pinned(struct page *page, enum pt_level level)
static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
SetPagePinned(page);
return 0;
@ -783,10 +917,11 @@ static __init int mark_pinned(struct page *page, enum pt_level level)
void __init xen_mark_init_mm_pinned(void)
{
pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
static int unpin_page(struct page *page, enum pt_level level)
static int xen_unpin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
unsigned pgfl = TestClearPagePinned(page);
@ -796,10 +931,18 @@ static int unpin_page(struct page *page, enum pt_level level)
spinlock_t *ptl = NULL;
struct multicall_space mcs;
/*
* Do the converse to pin_page. If we're using split
* pte locks, we must be holding the lock for while
* the pte page is unpinned but still RO to prevent
* concurrent updates from seeing it in this
* partially-pinned state.
*/
if (level == PT_PTE) {
ptl = lock_pte(page);
ptl = xen_pte_lock(page, mm);
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
if (ptl)
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
}
mcs = __xen_mc_entry(0);
@ -810,7 +953,7 @@ static int unpin_page(struct page *page, enum pt_level level)
if (ptl) {
/* unlock when batch completed */
xen_mc_callback(do_unlock, ptl);
xen_mc_callback(xen_pte_unlock, ptl);
}
}
@ -818,7 +961,7 @@ static int unpin_page(struct page *page, enum pt_level level)
}
/* Release a pagetables pages back as normal RW */
static void xen_pgd_unpin(pgd_t *pgd)
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
xen_mc_batch();
@ -830,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd)
if (user_pgd) {
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
unpin_page(virt_to_page(user_pgd), PT_PGD);
xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
}
}
#endif
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is unpinned */
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
PT_PMD);
#endif
pgd_walk(pgd, unpin_page, USER_LIMIT);
xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}
static void xen_pgd_unpin(struct mm_struct *mm)
{
__xen_pgd_unpin(mm, mm->pgd);
}
/*
* On resume, undo any pinning done at save, so that the rest of the
* kernel doesn't see any unexpected pinned pagetables.
@ -859,7 +1008,7 @@ void xen_mm_unpin_all(void)
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
BUG_ON(!PagePinned(page));
xen_pgd_unpin((pgd_t *)page_address(page));
__xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
ClearPageSavePinned(page);
}
}
@ -870,14 +1019,14 @@ void xen_mm_unpin_all(void)
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
spin_lock(&next->page_table_lock);
xen_pgd_pin(next->pgd);
xen_pgd_pin(next);
spin_unlock(&next->page_table_lock);
}
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
spin_lock(&mm->page_table_lock);
xen_pgd_pin(mm->pgd);
xen_pgd_pin(mm);
spin_unlock(&mm->page_table_lock);
}
@ -907,7 +1056,7 @@ static void drop_other_mm_ref(void *info)
}
}
static void drop_mm_ref(struct mm_struct *mm)
static void xen_drop_mm_ref(struct mm_struct *mm)
{
cpumask_t mask;
unsigned cpu;
@ -937,7 +1086,7 @@ static void drop_mm_ref(struct mm_struct *mm)
smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
}
#else
static void drop_mm_ref(struct mm_struct *mm)
static void xen_drop_mm_ref(struct mm_struct *mm)
{
if (current->active_mm == mm)
load_cr3(swapper_pg_dir);
@ -961,14 +1110,77 @@ static void drop_mm_ref(struct mm_struct *mm)
void xen_exit_mmap(struct mm_struct *mm)
{
get_cpu(); /* make sure we don't move around */
drop_mm_ref(mm);
xen_drop_mm_ref(mm);
put_cpu();
spin_lock(&mm->page_table_lock);
/* pgd may not be pinned in the error exit path of execve */
if (page_pinned(mm->pgd))
xen_pgd_unpin(mm->pgd);
if (xen_page_pinned(mm->pgd))
xen_pgd_unpin(mm);
spin_unlock(&mm->page_table_lock);
}
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pgd_update_pinned);
debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
&mmu_stats.pgd_update_pinned);
debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pud_update_pinned);
debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
&mmu_stats.pud_update_pinned);
debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pmd_update_pinned);
debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
&mmu_stats.pmd_update_pinned);
debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
// &mmu_stats.pte_update_pinned);
debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
&mmu_stats.pte_update_pinned);
debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
&mmu_stats.mmu_update_extended);
xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
mmu_stats.mmu_update_histo, 20);
debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_batched);
debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_current);
debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_kernel);
debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
return 0;
}
fs_initcall(xen_mmu_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View File

@ -18,9 +18,6 @@ void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
void xen_exit_mmap(struct mm_struct *mm);
void xen_pgd_pin(pgd_t *pgd);
//void xen_pgd_unpin(pgd_t *pgd);
pteval_t xen_pte_val(pte_t);
pmdval_t xen_pmd_val(pmd_t);
pgdval_t xen_pgd_val(pgd_t);

View File

@ -21,16 +21,20 @@
*/
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/debugfs.h>
#include <asm/xen/hypercall.h>
#include "multicalls.h"
#include "debugfs.h"
#define MC_BATCH 32
#define MC_DEBUG 1
#define MC_BATCH 32
#define MC_ARGS (MC_BATCH * 16)
struct mc_buffer {
struct multicall_entry entries[MC_BATCH];
#if MC_DEBUG
@ -47,6 +51,76 @@ struct mc_buffer {
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
/* flush reasons 0- slots, 1- args, 2- callbacks */
enum flush_reasons
{
FL_SLOTS,
FL_ARGS,
FL_CALLBACKS,
FL_N_REASONS
};
#ifdef CONFIG_XEN_DEBUG_FS
#define NHYPERCALLS 40 /* not really */
static struct {
unsigned histo[MC_BATCH+1];
unsigned issued;
unsigned arg_total;
unsigned hypercalls;
unsigned histo_hypercalls[NHYPERCALLS];
unsigned flush[FL_N_REASONS];
} mc_stats;
static u8 zero_stats;
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&mc_stats, 0, sizeof(mc_stats));
zero_stats = 0;
}
}
static void mc_add_stats(const struct mc_buffer *mc)
{
int i;
check_zero();
mc_stats.issued++;
mc_stats.hypercalls += mc->mcidx;
mc_stats.arg_total += mc->argidx;
mc_stats.histo[mc->mcidx]++;
for(i = 0; i < mc->mcidx; i++) {
unsigned op = mc->entries[i].op;
if (op < NHYPERCALLS)
mc_stats.histo_hypercalls[op]++;
}
}
static void mc_stats_flush(enum flush_reasons idx)
{
check_zero();
mc_stats.flush[idx]++;
}
#else /* !CONFIG_XEN_DEBUG_FS */
static inline void mc_add_stats(const struct mc_buffer *mc)
{
}
static inline void mc_stats_flush(enum flush_reasons idx)
{
}
#endif /* CONFIG_XEN_DEBUG_FS */
void xen_mc_flush(void)
{
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
@ -60,6 +134,8 @@ void xen_mc_flush(void)
something in the middle */
local_irq_save(flags);
mc_add_stats(b);
if (b->mcidx) {
#if MC_DEBUG
memcpy(b->debug, b->entries,
@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args)
if (b->mcidx == MC_BATCH ||
(argidx + args) > MC_ARGS) {
mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
xen_mc_flush();
argidx = roundup(b->argidx, sizeof(u64));
}
@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data)
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
struct callback *cb;
if (b->cbidx == MC_BATCH)
if (b->cbidx == MC_BATCH) {
mc_stats_flush(FL_CALLBACKS);
xen_mc_flush();
}
cb = &b->callbacks[b->cbidx++];
cb->fn = fn;
cb->data = data;
}
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mc_debug;
static int __init xen_mc_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mc_debug = debugfs_create_dir("multicalls", d_xen);
debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
mc_stats.histo, MC_BATCH);
xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
mc_stats.histo_hypercalls, NHYPERCALLS);
xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
mc_stats.flush, FL_N_REASONS);
return 0;
}
fs_initcall(xen_mc_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View File

@ -11,11 +11,8 @@
* useful topology information for the kernel to make use of. As a
* result, all CPUs are treated as if they're single-core and
* single-threaded.
*
* This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/err.h>
#include <linux/smp.h>
@ -36,8 +33,6 @@
#include "xen-ops.h"
#include "mmu.h"
static void __cpuinit xen_init_lock_cpu(int cpu);
cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq);
@ -64,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
static __cpuinit void cpu_bringup_and_idle(void)
static __cpuinit void cpu_bringup(void)
{
int cpu = smp_processor_id();
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
xen_enable_sysenter();
@ -89,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
local_irq_enable();
wmb(); /* make sure everything is out */
}
static __cpuinit void cpu_bringup_and_idle(void)
{
cpu_bringup();
cpu_idle();
}
@ -212,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
cpu_set(cpu, cpu_present_map);
}
//init_xenbus_allowed_cpumask();
}
static __cpuinit int
@ -281,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu);
int rc;
#if 0
rc = cpu_up_check(cpu);
if (rc)
return rc;
#endif
#ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */
WARN_ON(cpu == 0);
@ -339,6 +332,60 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
{
}
#ifdef CONFIG_HOTPLUG_CPU
static int xen_cpu_disable(void)
{
unsigned int cpu = smp_processor_id();
if (cpu == 0)
return -EBUSY;
cpu_disable_common();
load_cr3(swapper_pg_dir);
return 0;
}
static void xen_cpu_die(unsigned int cpu)
{
while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
if (num_online_cpus() == 1)
alternatives_smp_switch(0);
}
static void xen_play_dead(void)
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
}
#else /* !CONFIG_HOTPLUG_CPU */
static int xen_cpu_disable(void)
{
return -ENOSYS;
}
static void xen_cpu_die(unsigned int cpu)
{
BUG();
}
static void xen_play_dead(void)
{
BUG();
}
#endif
static void stop_self(void *v)
{
int cpu = smp_processor_id();
@ -419,176 +466,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
struct xen_spinlock {
unsigned char lock; /* 0 -> free; 1 -> locked */
unsigned short spinners; /* count of waiting cpus */
};
static int xen_spin_is_locked(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
return xl->lock != 0;
}
static int xen_spin_is_contended(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
/* Not strictly true; this is only the count of contended
lock-takers entering the slow path. */
return xl->spinners != 0;
}
static int xen_spin_trylock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
asm("xchgb %b0,%1"
: "+q" (old), "+m" (xl->lock) : : "memory");
return old == 0;
}
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
static inline void spinning_lock(struct xen_spinlock *xl)
{
__get_cpu_var(lock_spinners) = xl;
wmb(); /* set lock of interest before count */
asm(LOCK_PREFIX " incw %0"
: "+m" (xl->spinners) : : "memory");
}
static inline void unspinning_lock(struct xen_spinlock *xl)
{
asm(LOCK_PREFIX " decw %0"
: "+m" (xl->spinners) : : "memory");
wmb(); /* decrement count before clearing lock */
__get_cpu_var(lock_spinners) = NULL;
}
static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
int irq = __get_cpu_var(lock_kicker_irq);
int ret;
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1)
return 0;
/* announce we're spinning */
spinning_lock(xl);
/* clear pending */
xen_clear_irq_pending(irq);
/* check again make sure it didn't become free while
we weren't looking */
ret = xen_spin_trylock(lock);
if (ret)
goto out;
/* block until irq becomes pending */
xen_poll_irq(irq);
kstat_this_cpu.irqs[irq]++;
out:
unspinning_lock(xl);
return ret;
}
static void xen_spin_lock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
int timeout;
u8 oldval;
do {
timeout = 1 << 10;
asm("1: xchgb %1,%0\n"
" testb %1,%1\n"
" jz 3f\n"
"2: rep;nop\n"
" cmpb $0,%0\n"
" je 1b\n"
" dec %2\n"
" jnz 2b\n"
"3:\n"
: "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
: "1" (1)
: "memory");
} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
}
static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
{
int cpu;
for_each_online_cpu(cpu) {
/* XXX should mix up next cpu selection */
if (per_cpu(lock_spinners, cpu) == xl) {
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
break;
}
}
}
static void xen_spin_unlock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
smp_wmb(); /* make sure no writes get moved after unlock */
xl->lock = 0; /* release lock */
/* make sure unlock happens before kick */
barrier();
if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl);
}
static __cpuinit void xen_init_lock_cpu(int cpu)
{
int irq;
const char *name;
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
xen_reschedule_interrupt,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
name,
NULL);
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}
static void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
pv_lock_ops.spin_trylock = xen_spin_trylock;
pv_lock_ops.spin_unlock = xen_spin_unlock;
}
static const struct smp_ops xen_smp_ops __initdata = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
.cpu_up = xen_cpu_up,
.smp_cpus_done = xen_smp_cpus_done,
.cpu_up = xen_cpu_up,
.cpu_die = xen_cpu_die,
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
.smp_send_stop = xen_smp_send_stop,
.smp_send_reschedule = xen_smp_send_reschedule,

428
arch/x86/xen/spinlock.c Normal file
View File

@ -0,0 +1,428 @@
/*
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
#include <linux/kernel_stat.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/log2.h>
#include <asm/paravirt.h>
#include <xen/interface/xen.h>
#include <xen/events.h>
#include "xen-ops.h"
#include "debugfs.h"
#ifdef CONFIG_XEN_DEBUG_FS
static struct xen_spinlock_stats
{
u64 taken;
u32 taken_slow;
u32 taken_slow_nested;
u32 taken_slow_pickup;
u32 taken_slow_spurious;
u32 taken_slow_irqenable;
u64 released;
u32 released_slow;
u32 released_slow_kicked;
#define HISTO_BUCKETS 30
u32 histo_spin_total[HISTO_BUCKETS+1];
u32 histo_spin_spinning[HISTO_BUCKETS+1];
u32 histo_spin_blocked[HISTO_BUCKETS+1];
u64 time_total;
u64 time_spinning;
u64 time_blocked;
} spinlock_stats;
static u8 zero_stats;
static unsigned lock_timeout = 1 << 10;
#define TIMEOUT lock_timeout
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&spinlock_stats, 0, sizeof(spinlock_stats));
zero_stats = 0;
}
}
#define ADD_STATS(elem, val) \
do { check_zero(); spinlock_stats.elem += (val); } while(0)
static inline u64 spin_time_start(void)
{
return xen_clocksource_read();
}
static void __spin_time_accum(u64 delta, u32 *array)
{
unsigned index = ilog2(delta);
check_zero();
if (index < HISTO_BUCKETS)
array[index]++;
else
array[HISTO_BUCKETS]++;
}
static inline void spin_time_accum_spinning(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
spinlock_stats.time_spinning += delta;
}
static inline void spin_time_accum_total(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_total);
spinlock_stats.time_total += delta;
}
static inline void spin_time_accum_blocked(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
spinlock_stats.time_blocked += delta;
}
#else /* !CONFIG_XEN_DEBUG_FS */
#define TIMEOUT (1 << 10)
#define ADD_STATS(elem, val) do { (void)(val); } while(0)
static inline u64 spin_time_start(void)
{
return 0;
}
static inline void spin_time_accum_total(u64 start)
{
}
static inline void spin_time_accum_spinning(u64 start)
{
}
static inline void spin_time_accum_blocked(u64 start)
{
}
#endif /* CONFIG_XEN_DEBUG_FS */
struct xen_spinlock {
unsigned char lock; /* 0 -> free; 1 -> locked */
unsigned short spinners; /* count of waiting cpus */
};
static int xen_spin_is_locked(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
return xl->lock != 0;
}
static int xen_spin_is_contended(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
/* Not strictly true; this is only the count of contended
lock-takers entering the slow path. */
return xl->spinners != 0;
}
static int xen_spin_trylock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
asm("xchgb %b0,%1"
: "+q" (old), "+m" (xl->lock) : : "memory");
return old == 0;
}
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
/*
* Mark a cpu as interested in a lock. Returns the CPU's previous
* lock of interest, in case we got preempted by an interrupt.
*/
static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
{
struct xen_spinlock *prev;
prev = __get_cpu_var(lock_spinners);
__get_cpu_var(lock_spinners) = xl;
wmb(); /* set lock of interest before count */
asm(LOCK_PREFIX " incw %0"
: "+m" (xl->spinners) : : "memory");
return prev;
}
/*
* Mark a cpu as no longer interested in a lock. Restores previous
* lock of interest (NULL for none).
*/
static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
{
asm(LOCK_PREFIX " decw %0"
: "+m" (xl->spinners) : : "memory");
wmb(); /* decrement count before restoring lock */
__get_cpu_var(lock_spinners) = prev;
}
static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
struct xen_spinlock *prev;
int irq = __get_cpu_var(lock_kicker_irq);
int ret;
unsigned long flags;
u64 start;
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1)
return 0;
start = spin_time_start();
/* announce we're spinning */
prev = spinning_lock(xl);
flags = __raw_local_save_flags();
if (irq_enable) {
ADD_STATS(taken_slow_irqenable, 1);
raw_local_irq_enable();
}
ADD_STATS(taken_slow, 1);
ADD_STATS(taken_slow_nested, prev != NULL);
do {
/* clear pending */
xen_clear_irq_pending(irq);
/* check again make sure it didn't become free while
we weren't looking */
ret = xen_spin_trylock(lock);
if (ret) {
ADD_STATS(taken_slow_pickup, 1);
/*
* If we interrupted another spinlock while it
* was blocking, make sure it doesn't block
* without rechecking the lock.
*/
if (prev != NULL)
xen_set_irq_pending(irq);
goto out;
}
/*
* Block until irq becomes pending. If we're
* interrupted at this point (after the trylock but
* before entering the block), then the nested lock
* handler guarantees that the irq will be left
* pending if there's any chance the lock became free;
* xen_poll_irq() returns immediately if the irq is
* pending.
*/
xen_poll_irq(irq);
ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
kstat_this_cpu.irqs[irq]++;
out:
raw_local_irq_restore(flags);
unspinning_lock(xl, prev);
spin_time_accum_blocked(start);
return ret;
}
static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
unsigned timeout;
u8 oldval;
u64 start_spin;
ADD_STATS(taken, 1);
start_spin = spin_time_start();
do {
u64 start_spin_fast = spin_time_start();
timeout = TIMEOUT;
asm("1: xchgb %1,%0\n"
" testb %1,%1\n"
" jz 3f\n"
"2: rep;nop\n"
" cmpb $0,%0\n"
" je 1b\n"
" dec %2\n"
" jnz 2b\n"
"3:\n"
: "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
: "1" (1)
: "memory");
spin_time_accum_spinning(start_spin_fast);
} while (unlikely(oldval != 0 &&
(TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
spin_time_accum_total(start_spin);
}
static void xen_spin_lock(struct raw_spinlock *lock)
{
__xen_spin_lock(lock, false);
}
static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
{
__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
}
static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
{
int cpu;
ADD_STATS(released_slow, 1);
for_each_online_cpu(cpu) {
/* XXX should mix up next cpu selection */
if (per_cpu(lock_spinners, cpu) == xl) {
ADD_STATS(released_slow_kicked, 1);
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
break;
}
}
}
static void xen_spin_unlock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
ADD_STATS(released, 1);
smp_wmb(); /* make sure no writes get moved after unlock */
xl->lock = 0; /* release lock */
/* make sure unlock happens before kick */
barrier();
if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl);
}
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
BUG();
return IRQ_HANDLED;
}
void __cpuinit xen_init_lock_cpu(int cpu)
{
int irq;
const char *name;
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
dummy_handler,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
name,
NULL);
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}
void xen_uninit_lock_cpu(int cpu)
{
unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
}
void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
pv_lock_ops.spin_trylock = xen_spin_trylock;
pv_lock_ops.spin_unlock = xen_spin_unlock;
}
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_spin_debug;
static int __init xen_spinlock_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
debugfs_create_u32("taken_slow", 0444, d_spin_debug,
&spinlock_stats.taken_slow);
debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
&spinlock_stats.taken_slow_nested);
debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
&spinlock_stats.taken_slow_pickup);
debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
&spinlock_stats.taken_slow_spurious);
debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
&spinlock_stats.taken_slow_irqenable);
debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
debugfs_create_u32("released_slow", 0444, d_spin_debug,
&spinlock_stats.released_slow);
debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
&spinlock_stats.released_slow_kicked);
debugfs_create_u64("time_spinning", 0444, d_spin_debug,
&spinlock_stats.time_spinning);
debugfs_create_u64("time_blocked", 0444, d_spin_debug,
&spinlock_stats.time_blocked);
debugfs_create_u64("time_total", 0444, d_spin_debug,
&spinlock_stats.time_total);
xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
return 0;
}
fs_initcall(xen_spinlock_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View File

@ -30,8 +30,6 @@
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)
static cycle_t xen_clocksource_read(void);
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void)
return xen_khz;
}
static cycle_t xen_clocksource_read(void)
cycle_t xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
@ -452,6 +450,14 @@ void xen_setup_timer(int cpu)
setup_runstate_info(cpu);
}
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu);
unbind_from_irqhandler(evt->irq, NULL);
}
void xen_setup_cpu_clockevents(void)
{
BUG_ON(preemptible());

View File

@ -298,7 +298,7 @@ check_events:
push %eax
push %ecx
push %edx
call force_evtchn_callback
call xen_force_evtchn_callback
pop %edx
pop %ecx
pop %eax

View File

@ -26,8 +26,15 @@
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
#define XEN_EFLAGS_NMI 0x80000000
#if 0
#include <asm/percpu.h>
#if 1
/*
x86-64 does not yet support direct access to percpu variables
via a segment override, so we just need to make sure this code
never gets used
*/
#define BUG ud2a
#define PER_CPU_VAR(var, off) 0xdeadbeef
#endif
/*
Enable events. This clears the event mask and tests the pending
@ -35,6 +42,8 @@
events, then enter the hypervisor to get them handled.
*/
ENTRY(xen_irq_enable_direct)
BUG
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
@ -58,6 +67,8 @@ ENDPATCH(xen_irq_enable_direct)
non-zero.
*/
ENTRY(xen_irq_disable_direct)
BUG
movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
ENDPATCH(xen_irq_disable_direct)
ret
@ -74,6 +85,8 @@ ENDPATCH(xen_irq_disable_direct)
Xen and x86 use opposite senses (mask vs enable).
*/
ENTRY(xen_save_fl_direct)
BUG
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
setz %ah
addb %ah,%ah
@ -91,6 +104,8 @@ ENDPATCH(xen_save_fl_direct)
if so.
*/
ENTRY(xen_restore_fl_direct)
BUG
testb $X86_EFLAGS_IF>>8, %ah
setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
/* Preempt here doesn't matter because that will deal with
@ -122,7 +137,7 @@ check_events:
push %r9
push %r10
push %r11
call force_evtchn_callback
call xen_force_evtchn_callback
pop %r11
pop %r10
pop %r9
@ -133,7 +148,6 @@ check_events:
pop %rcx
pop %rax
ret
#endif
ENTRY(xen_adjust_exception_frame)
mov 8+0(%rsp),%rcx

View File

@ -2,6 +2,7 @@
#define XEN_OPS_H
#include <linux/init.h>
#include <linux/clocksource.h>
#include <linux/irqreturn.h>
#include <xen/xen-ops.h>
@ -31,7 +32,10 @@ void xen_vcpu_restore(void);
void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
unsigned long xen_tsc_khz(void);
void __init xen_time_init(void);
@ -50,6 +54,10 @@ void __init xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
void xen_uninit_lock_cpu(int cpu);
extern cpumask_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}

View File

@ -1066,7 +1066,7 @@ static struct xenbus_driver blkfront = {
static int __init xlblk_init(void)
{
if (!is_running_on_xen())
if (!xen_domain())
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {

View File

@ -108,8 +108,8 @@ static int __init xen_init(void)
{
struct hvc_struct *hp;
if (!is_running_on_xen() ||
is_initial_xendomain() ||
if (!xen_pv_domain() ||
xen_initial_domain() ||
!xen_start_info->console.domU.evtchn)
return -ENODEV;
@ -142,7 +142,7 @@ static void __exit xen_fini(void)
static int xen_cons_init(void)
{
if (!is_running_on_xen())
if (!xen_pv_domain())
return 0;
hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);

View File

@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = {
static int __init xenkbd_init(void)
{
if (!is_running_on_xen())
if (!xen_domain())
return -ENODEV;
/* Nothing to do if running in dom0. */
if (is_initial_xendomain())
if (xen_initial_domain())
return -ENODEV;
return xenbus_register_frontend(&xenkbd);

View File

@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = {
static int __init netif_init(void)
{
if (!is_running_on_xen())
if (!xen_domain())
return -ENODEV;
if (is_initial_xendomain())
if (xen_initial_domain())
return 0;
printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
@ -1809,7 +1809,7 @@ module_init(netif_init);
static void __exit netif_exit(void)
{
if (is_initial_xendomain())
if (xen_initial_domain())
return;
xenbus_unregister_driver(&netfront);

View File

@ -210,143 +210,7 @@ timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action)
/*-------------------------------------------------------------------------*/
/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
/* Section 2.2 Host Controller Capability Registers */
struct ehci_caps {
/* these fields are specified as 8 and 16 bit registers,
* but some hosts can't perform 8 or 16 bit PCI accesses.
*/
u32 hc_capbase;
#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */
#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */
u32 hcs_params; /* HCSPARAMS - offset 0x4 */
#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */
#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */
#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */
#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */
#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */
#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */
#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */
u32 hcc_params; /* HCCPARAMS - offset 0x8 */
#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */
#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */
#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */
#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute [8]; /* nibbles for routing - offset 0xC */
} __attribute__ ((packed));
/* Section 2.3 Host Controller Operational Registers */
struct ehci_regs {
/* USBCMD: offset 0x00 */
u32 command;
/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
#define CMD_PARK (1<<11) /* enable "park" on async qh */
#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */
#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */
#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */
#define CMD_ASE (1<<5) /* async schedule enable */
#define CMD_PSE (1<<4) /* periodic schedule enable */
/* 3:2 is periodic frame list size */
#define CMD_RESET (1<<1) /* reset HC not bus */
#define CMD_RUN (1<<0) /* start/stop HC */
/* USBSTS: offset 0x04 */
u32 status;
#define STS_ASS (1<<15) /* Async Schedule Status */
#define STS_PSS (1<<14) /* Periodic Schedule Status */
#define STS_RECL (1<<13) /* Reclamation */
#define STS_HALT (1<<12) /* Not running (any reason) */
/* some bits reserved */
/* these STS_* flags are also intr_enable bits (USBINTR) */
#define STS_IAA (1<<5) /* Interrupted on async advance */
#define STS_FATAL (1<<4) /* such as some PCI access errors */
#define STS_FLR (1<<3) /* frame list rolled over */
#define STS_PCD (1<<2) /* port change detect */
#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */
#define STS_INT (1<<0) /* "normal" completion (short, ...) */
/* USBINTR: offset 0x08 */
u32 intr_enable;
/* FRINDEX: offset 0x0C */
u32 frame_index; /* current microframe number */
/* CTRLDSSEGMENT: offset 0x10 */
u32 segment; /* address bits 63:32 if needed */
/* PERIODICLISTBASE: offset 0x14 */
u32 frame_list; /* points to periodic list */
/* ASYNCLISTADDR: offset 0x18 */
u32 async_next; /* address of next async queue head */
u32 reserved [9];
/* CONFIGFLAG: offset 0x40 */
u32 configured_flag;
#define FLAG_CF (1<<0) /* true: we'll support "high speed" */
/* PORTSC: offset 0x44 */
u32 port_status [0]; /* up to N_PORTS */
/* 31:23 reserved */
#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */
#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */
#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */
/* 19:16 for port testing */
#define PORT_LED_OFF (0<<14)
#define PORT_LED_AMBER (1<<14)
#define PORT_LED_GREEN (2<<14)
#define PORT_LED_MASK (3<<14)
#define PORT_OWNER (1<<13) /* true: companion hc owns this port */
#define PORT_POWER (1<<12) /* true: has power (see PPC) */
#define PORT_USB11(x) (((x)&(3<<10))==(1<<10)) /* USB 1.1 device */
/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
/* 9 reserved */
#define PORT_RESET (1<<8) /* reset port */
#define PORT_SUSPEND (1<<7) /* suspend port */
#define PORT_RESUME (1<<6) /* resume it */
#define PORT_OCC (1<<5) /* over current change */
#define PORT_OC (1<<4) /* over current active */
#define PORT_PEC (1<<3) /* port enable change */
#define PORT_PE (1<<2) /* port enable */
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
} __attribute__ ((packed));
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
#define USBMODE_BE (1<<2) /* BE/LE endianness select */
#define USBMODE_CM_HC (3<<0) /* host controller mode */
#define USBMODE_CM_IDLE (0<<0) /* idle state */
/* Appendix C, Debug port ... intended for use with special "debug devices"
* that can help if there's no serial console. (nonstandard enumeration.)
*/
struct ehci_dbg_port {
u32 control;
#define DBGP_OWNER (1<<30)
#define DBGP_ENABLED (1<<28)
#define DBGP_DONE (1<<16)
#define DBGP_INUSE (1<<10)
#define DBGP_ERRCODE(x) (((x)>>7)&0x07)
# define DBGP_ERR_BAD 1
# define DBGP_ERR_SIGNAL 2
#define DBGP_ERROR (1<<6)
#define DBGP_GO (1<<5)
#define DBGP_OUT (1<<4)
#define DBGP_LEN(x) (((x)>>0)&0x0f)
u32 pids;
#define DBGP_PID_GET(x) (((x)>>16)&0xff)
#define DBGP_PID_SET(data,tok) (((data)<<8)|(tok))
u32 data03;
u32 data47;
u32 address;
#define DBGP_EPADDR(dev,ep) (((dev)<<8)|(ep))
} __attribute__ ((packed));
#include <linux/usb/ehci_def.h>
/*-------------------------------------------------------------------------*/

View File

@ -673,7 +673,6 @@ config FB_VESA
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
select VIDEO_SELECT
help
This is the frame buffer device driver for generic VESA 2.0
compliant graphic cards. The older VESA 1.2 cards are not supported.
@ -1578,7 +1577,6 @@ config FB_CYBLA
tristate "Cyberblade/i1 support"
depends on FB && PCI && X86_32 && !64BIT
select FB_CFB_IMAGEBLIT
select VIDEO_SELECT
---help---
This driver is supposed to support the Trident Cyberblade/i1
graphics core integrated in the VIA VT8601A North Bridge,

View File

@ -43,22 +43,6 @@ config VGACON_SOFT_SCROLLBACK_SIZE
buffer. Each 64KB will give you approximately 16 80x25
screenfuls of scrollback buffer
config VIDEO_SELECT
bool "Video mode selection support"
depends on X86 && VGA_CONSOLE
---help---
This enables support for text mode selection on kernel startup. If
you want to take advantage of some high-resolution text mode your
card's BIOS offers, but the traditional Linux utilities like
SVGATextMode don't, you can say Y here and set the mode using the
"vga=" option from your boot loader (lilo or loadlin) or set
"vga=ask" which brings up a video mode menu on kernel startup. (Try
"man bootparam" or see the documentation of your boot loader about
how to pass options to the kernel.)
Read the file <file:Documentation/svga.txt> for more information
about the Video mode selection support. If unsure, say N.
config MDA_CONSOLE
depends on !M68K && !PARISC && ISA
tristate "MDA text console (dual-headed) (EXPERIMENTAL)"

View File

@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = {
static int __init xenfb_init(void)
{
if (!is_running_on_xen())
if (!xen_domain())
return -ENODEV;
/* Nothing to do if running in dom0. */
if (is_initial_xendomain())
if (xen_initial_domain())
return -ENODEV;
return xenbus_register_frontend(&xenfb);

View File

@ -1,4 +1,5 @@
obj-y += grant-table.o features.o events.o manage.o
obj-y += xenbus/
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o

View File

@ -53,7 +53,6 @@
#include <asm/tlb.h>
#include <xen/interface/memory.h>
#include <xen/balloon.h>
#include <xen/xenbus.h>
#include <xen/features.h>
#include <xen/page.h>
@ -226,9 +225,8 @@ static int increase_reservation(unsigned long nr_pages)
}
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(
XENMEM_populate_physmap, &reservation);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
if (rc > 0) {
int ret;
@ -236,7 +234,7 @@ static int increase_reservation(unsigned long nr_pages)
/* We hit the Xen hard limit: reprobe. */
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
&reservation);
BUG_ON(ret != rc);
}
if (rc >= 0)
@ -420,7 +418,7 @@ static int __init balloon_init(void)
unsigned long pfn;
struct page *page;
if (!is_running_on_xen())
if (!xen_pv_domain())
return -ENODEV;
pr_info("xen_balloon: Initialising balloon driver.\n");
@ -464,136 +462,13 @@ static void balloon_exit(void)
module_exit(balloon_exit);
static void balloon_update_driver_allowance(long delta)
{
unsigned long flags;
spin_lock_irqsave(&balloon_lock, flags);
balloon_stats.driver_pages += delta;
spin_unlock_irqrestore(&balloon_lock, flags);
}
static int dealloc_pte_fn(
pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
{
unsigned long mfn = pte_mfn(*pte);
int ret;
struct xen_memory_reservation reservation = {
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &mfn);
set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != 1);
return 0;
}
static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
unsigned long vaddr, flags;
struct page *page, **pagevec;
int i, ret;
pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
if (pagevec == NULL)
return NULL;
for (i = 0; i < nr_pages; i++) {
page = pagevec[i] = alloc_page(GFP_KERNEL);
if (page == NULL)
goto err;
vaddr = (unsigned long)page_address(page);
scrub_page(page);
spin_lock_irqsave(&balloon_lock, flags);
if (xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long gmfn = page_to_pfn(page);
struct xen_memory_reservation reservation = {
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &gmfn);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
if (ret == 1)
ret = 0; /* success */
} else {
ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
dealloc_pte_fn, NULL);
}
if (ret != 0) {
spin_unlock_irqrestore(&balloon_lock, flags);
__free_page(page);
goto err;
}
totalram_pages = --balloon_stats.current_pages;
spin_unlock_irqrestore(&balloon_lock, flags);
}
out:
schedule_work(&balloon_worker);
flush_tlb_all();
return pagevec;
err:
spin_lock_irqsave(&balloon_lock, flags);
while (--i >= 0)
balloon_append(pagevec[i]);
spin_unlock_irqrestore(&balloon_lock, flags);
kfree(pagevec);
pagevec = NULL;
goto out;
}
static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
{
unsigned long flags;
int i;
if (pagevec == NULL)
return;
spin_lock_irqsave(&balloon_lock, flags);
for (i = 0; i < nr_pages; i++) {
BUG_ON(page_count(pagevec[i]) != 1);
balloon_append(pagevec[i]);
}
spin_unlock_irqrestore(&balloon_lock, flags);
kfree(pagevec);
schedule_work(&balloon_worker);
}
static void balloon_release_driver_page(struct page *page)
{
unsigned long flags;
spin_lock_irqsave(&balloon_lock, flags);
balloon_append(page);
balloon_stats.driver_pages--;
spin_unlock_irqrestore(&balloon_lock, flags);
schedule_work(&balloon_worker);
}
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
@ -604,7 +479,8 @@ BALLOON_SHOW(hard_limit_kb,
(balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
static ssize_t show_target_kb(struct sys_device *dev, char *buf)
static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
}
@ -614,19 +490,14 @@ static ssize_t store_target_kb(struct sys_device *dev,
const char *buf,
size_t count)
{
char memstring[64], *endchar;
char *endchar;
unsigned long long target_bytes;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (count <= 1)
return -EBADMSG; /* runt */
if (count > sizeof(memstring))
return -EFBIG; /* too long */
strcpy(memstring, buf);
target_bytes = memparse(buf, &endchar);
target_bytes = memparse(memstring, &endchar);
balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
@ -694,20 +565,4 @@ static int register_balloon(struct sys_device *sysdev)
return error;
}
static void unregister_balloon(struct sys_device *sysdev)
{
int i;
sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
sysdev_remove_file(sysdev, balloon_attrs[i]);
sysdev_unregister(sysdev);
sysdev_class_unregister(&balloon_sysdev_class);
}
static void balloon_sysfs_exit(void)
{
unregister_balloon(&balloon_sysdev);
}
MODULE_LICENSE("GPL");

90
drivers/xen/cpu_hotplug.c Normal file
View File

@ -0,0 +1,90 @@
#include <linux/notifier.h>
#include <xen/xenbus.h>
#include <asm-x86/xen/hypervisor.h>
#include <asm/cpu.h>
static void enable_hotplug_cpu(int cpu)
{
if (!cpu_present(cpu))
arch_register_cpu(cpu);
cpu_set(cpu, cpu_present_map);
}
static void disable_hotplug_cpu(int cpu)
{
if (cpu_present(cpu))
arch_unregister_cpu(cpu);
cpu_clear(cpu, cpu_present_map);
}
static void vcpu_hotplug(unsigned int cpu)
{
int err;
char dir[32], state[32];
if (!cpu_possible(cpu))
return;
sprintf(dir, "cpu/%u", cpu);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
if (err != 1) {
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
return;
}
if (strcmp(state, "online") == 0) {
enable_hotplug_cpu(cpu);
} else if (strcmp(state, "offline") == 0) {
(void)cpu_down(cpu);
disable_hotplug_cpu(cpu);
} else {
printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
state, cpu);
}
}
static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
unsigned int cpu;
char *cpustr;
const char *node = vec[XS_WATCH_PATH];
cpustr = strstr(node, "cpu/");
if (cpustr != NULL) {
sscanf(cpustr, "cpu/%u", &cpu);
vcpu_hotplug(cpu);
}
}
static int setup_cpu_watcher(struct notifier_block *notifier,
unsigned long event, void *data)
{
static struct xenbus_watch cpu_watch = {
.node = "cpu",
.callback = handle_vcpu_hotplug_event};
(void)register_xenbus_watch(&cpu_watch);
return NOTIFY_DONE;
}
static int __init setup_vcpu_hotplug_event(void)
{
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };
if (!xen_pv_domain())
return -ENODEV;
register_xenstore_notifier(&xsn_cpu);
return 0;
}
arch_initcall(setup_vcpu_hotplug_event);

View File

@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS];
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
/*
* Force a proper event-channel callback from Xen after clearing the
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
void force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
EXPORT_SYMBOL_GPL(force_evtchn_callback);
static struct irq_chip xen_dynamic_chip;
/* Constructor for packed IRQ information. */
@ -175,6 +164,12 @@ static inline void set_evtchn(int port)
sync_set_bit(port, &s->evtchn_pending[0]);
}
static inline int test_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
return sync_test_bit(port, &s->evtchn_pending[0]);
}
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
@ -365,6 +360,10 @@ static void unbind_from_irq(unsigned int irq)
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1;
break;
default:
break;
}
@ -743,6 +742,25 @@ void xen_clear_irq_pending(int irq)
clear_evtchn(evtchn);
}
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
set_evtchn(evtchn);
}
bool xen_test_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
bool ret = false;
if (VALID_EVTCHN(evtchn))
ret = test_evtchn(evtchn);
return ret;
}
/* Poll waiting for an irq to become pending. In the usual case, the
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq)

View File

@ -508,7 +508,7 @@ static int __devinit gnttab_init(void)
unsigned int max_nr_glist_frames, nr_glist_frames;
unsigned int nr_init_grefs;
if (!is_running_on_xen())
if (!xen_domain())
return -ENODEV;
nr_grant_frames = 1;

View File

@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void)
DPRINTK("");
err = -ENODEV;
if (!is_running_on_xen())
if (!xen_domain())
goto out_error;
/* Register ourselves with the kernel bus subsystem */
@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void)
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (is_initial_xendomain()) {
if (xen_initial_domain()) {
/* dom0 not yet supported */
} else {
xenstored_ready = 1;
@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void)
goto out_unreg_back;
}
if (!is_initial_xendomain())
if (!xen_initial_domain())
xenbus_probe(NULL);
return 0;
@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
unsigned long timeout = jiffies + 10*HZ;
struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
if (!ready_to_wait_for_devices || !is_running_on_xen())
if (!ready_to_wait_for_devices || !xen_domain())
return;
while (exists_disconnected_device(drv)) {

View File

@ -199,6 +199,8 @@ typedef struct siginfo {
*/
#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */
#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */
#define NSIGTRAP 2
/*

View File

@ -3,11 +3,6 @@
#include <asm-generic/siginfo.h>
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP
#define NSIGTRAP 4

View File

@ -16,4 +16,21 @@ static inline unsigned int get_bios_ebda(void)
void reserve_ebda_region(void);
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
/*
* This is obviously not a great place for this, but we want to be
* able to scatter it around anywhere in the kernel.
*/
void check_for_bios_corruption(void);
void start_periodic_check_for_corruption(void);
#else
static inline void check_for_bios_corruption(void)
{
}
static inline void start_periodic_check_for_corruption(void)
{
}
#endif
#endif /* ASM_X86__BIOS_EBDA_H */

View File

@ -2,9 +2,7 @@
#define ASM_X86__BOOT_H
/* Don't touch these, unless you really know what you're doing. */
#define DEF_INITSEG 0x9000
#define DEF_SYSSEG 0x1000
#define DEF_SETUPSEG 0x9020
#define DEF_SYSSIZE 0x7F00
/* Internal svga startup constants */

View File

@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc,
desc->d = info->seg_32bit;
desc->g = info->limit_in_pages;
desc->base2 = (info->base_addr & 0xff000000) >> 24;
/*
* Don't allow setting of the lm bit. It is useless anyway
* because 64bit system calls require __USER_CS:
*/
desc->l = 0;
}
extern struct desc_ptr idt_descr;
@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr)
native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g) \
native_write_idt_entry(dt, entry, g)
#endif
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
}
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
#endif /* CONFIG_PARAVIRT */
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)

View File

@ -0,0 +1,47 @@
#ifndef ASM_X86__MICROCODE_H
#define ASM_X86__MICROCODE_H
struct cpu_signature {
unsigned int sig;
unsigned int pf;
unsigned int rev;
};
struct device;
struct microcode_ops {
int (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
int (*request_microcode_fw) (int cpu, struct device *device);
void (*apply_microcode) (int cpu);
int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
void (*microcode_fini_cpu) (int cpu);
};
struct ucode_cpu_info {
struct cpu_signature cpu_sig;
int valid;
void *mc;
};
extern struct ucode_cpu_info ucode_cpu_info[];
#ifdef CONFIG_MICROCODE_INTEL
extern struct microcode_ops * __init init_intel_microcode(void);
#else
static inline struct microcode_ops * __init init_intel_microcode(void)
{
return NULL;
}
#endif /* CONFIG_MICROCODE_INTEL */
#ifdef CONFIG_MICROCODE_AMD
extern struct microcode_ops * __init init_amd_microcode(void);
#else
static inline struct microcode_ops * __init init_amd_microcode(void)
{
return NULL;
}
#endif
#endif /* ASM_X86__MICROCODE_H */

View File

@ -7,7 +7,7 @@
#ifdef CONFIG_NUMA
#define VIRTUAL_BUG_ON(x)
#include <linux/mmdebug.h>
#include <asm/smp.h>
@ -29,7 +29,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{
unsigned nid;
VIRTUAL_BUG_ON(!memnodemap);
VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize);
nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid;

View File

@ -73,7 +73,12 @@ typedef struct page *pgtable_t;
#endif
#ifndef __ASSEMBLY__
#define __phys_addr_const(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) ((x) - PAGE_OFFSET)
#endif
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM

View File

@ -124,6 +124,9 @@ struct pv_cpu_ops {
int entrynum, const void *desc, int size);
void (*write_idt_entry)(gate_desc *,
int entrynum, const gate_desc *gate);
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask);
@ -325,6 +328,7 @@ struct pv_lock_ops {
int (*spin_is_locked)(struct raw_spinlock *lock);
int (*spin_is_contended)(struct raw_spinlock *lock);
void (*spin_lock)(struct raw_spinlock *lock);
void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
int (*spin_trylock)(struct raw_spinlock *lock);
void (*spin_unlock)(struct raw_spinlock *lock);
};
@ -830,6 +834,16 @@ do { \
(aux) = __aux; \
} while (0)
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
}
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
}
static inline void load_TR_desc(void)
{
PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
@ -1394,6 +1408,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
}
static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
unsigned long flags)
{
PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}
static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);

View File

@ -586,41 +586,6 @@ static inline void clear_in_cr4(unsigned long mask)
write_cr4(cr4);
}
struct microcode_header {
unsigned int hdrver;
unsigned int rev;
unsigned int date;
unsigned int sig;
unsigned int cksum;
unsigned int ldrver;
unsigned int pf;
unsigned int datasize;
unsigned int totalsize;
unsigned int reserved[3];
};
struct microcode {
struct microcode_header hdr;
unsigned int bits[0];
};
typedef struct microcode microcode_t;
typedef struct microcode_header microcode_header_t;
/* microcode format is extended from prescott processors */
struct extended_signature {
unsigned int sig;
unsigned int pf;
unsigned int cksum;
};
struct extended_sigtable {
unsigned int count;
unsigned int cksum;
unsigned int reserved[3];
struct extended_signature sigs[0];
};
typedef struct {
unsigned long seg;
} mm_segment_t;

View File

@ -177,11 +177,11 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
#ifdef CONFIG_X86_32
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code);
#else
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int error_code, int si_code);
#endif
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
extern long syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_leave(struct pt_regs *);

View File

@ -50,12 +50,16 @@ extern struct {
struct smp_ops {
void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus);
int (*cpu_up)(unsigned cpu);
void (*smp_cpus_done)(unsigned max_cpus);
void (*smp_send_stop)(void);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
void (*send_call_func_ipi)(cpumask_t mask);
void (*send_call_func_single_ipi)(int cpu);
};
@ -94,6 +98,21 @@ static inline int __cpu_up(unsigned int cpu)
return smp_ops.cpu_up(cpu);
}
static inline int __cpu_disable(void)
{
return smp_ops.cpu_disable();
}
static inline void __cpu_die(unsigned int cpu)
{
smp_ops.cpu_die(cpu);
}
static inline void play_dead(void)
{
smp_ops.play_dead();
}
static inline void smp_send_reschedule(int cpu)
{
smp_ops.smp_send_reschedule(cpu);
@ -109,16 +128,19 @@ static inline void arch_send_call_function_ipi(cpumask_t mask)
smp_ops.send_call_func_ipi(mask);
}
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
int native_cpu_up(unsigned int cpunum);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
void play_dead_common(void);
void native_send_call_func_ipi(cpumask_t mask);
void native_send_call_func_single_ipi(int cpu);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@ -205,9 +227,5 @@ static inline int hard_smp_processor_id(void)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_uninit(void);
#endif
#endif /* __ASSEMBLY__ */
#endif /* ASM_X86__SMP_H */

View File

@ -21,8 +21,10 @@
#ifdef CONFIG_X86_32
# define LOCK_PTR_REG "a"
# define REG_PTR_MODE "k"
#else
# define LOCK_PTR_REG "D"
# define REG_PTR_MODE "q"
#endif
#if defined(CONFIG_X86_32) && \
@ -54,19 +56,7 @@
* much between them in performance though, especially as locks are out of line.
*/
#if (NR_CPUS < 256)
static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 8) & 0xff) != (tmp & 0xff));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 8) - tmp) & 0xff) > 1;
}
#define TICKET_SHIFT 8
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{
@ -89,19 +79,17 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
{
int tmp;
short new;
int tmp, new;
asm volatile("movw %2,%w0\n\t"
asm volatile("movzwl %2, %0\n\t"
"cmpb %h0,%b0\n\t"
"leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t"
"movw %w0,%w1\n\t"
"incb %h1\n\t"
LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
: "=&a" (tmp), "=Q" (new), "+m" (lock->slock)
: "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
:
: "memory", "cc");
@ -116,19 +104,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
: "memory", "cc");
}
#else
static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 16) - tmp) & 0xffff) > 1;
}
#define TICKET_SHIFT 16
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{
@ -146,7 +122,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (lock->slock), "=r" (tmp)
: "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
:
: "memory", "cc");
}
@ -160,13 +136,13 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
"movl %0,%1\n\t"
"roll $16, %0\n\t"
"cmpl %0,%1\n\t"
"leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t"
"addl $0x00010000, %1\n\t"
LOCK_PREFIX "cmpxchgl %1,%2\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
: "=&a" (tmp), "=r" (new), "+m" (lock->slock)
: "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
:
: "memory", "cc");
@ -182,7 +158,19 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
}
#endif
#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
#ifdef CONFIG_PARAVIRT
/*
@ -272,6 +260,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
{
__ticket_spin_unlock(lock);
}
static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
unsigned long flags)
{
__raw_spin_lock(lock);
}
#endif /* CONFIG_PARAVIRT */
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)

View File

@ -119,6 +119,10 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask,
{
}
static inline void reset_lazy_tlbstate(void)
{
}
#else /* SMP */
#include <asm/smp.h>
@ -151,6 +155,12 @@ struct tlb_state {
char __cacheline_padding[L1_CACHE_BYTES-8];
};
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
void reset_lazy_tlbstate(void);
#else
static inline void reset_lazy_tlbstate(void)
{
}
#endif
#endif /* SMP */

View File

@ -1,6 +1,8 @@
#ifndef ASM_X86__TRAPS_H
#define ASM_X86__TRAPS_H
#include <asm/debugreg.h>
/* Common in X86_32 and X86_64 */
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@ -36,6 +38,16 @@ void do_invalid_op(struct pt_regs *, long);
void do_general_protection(struct pt_regs *, long);
void do_nmi(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
if (condition & DR_STEP)
return TRAP_TRACE;
else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))
return TRAP_HWBKPT;
else
return TRAP_BRKPT;
}
extern int panic_on_unrecovered_nmi;
extern int kstack_depth_to_print;

View File

@ -54,7 +54,6 @@
/* arch/i386/kernel/setup.c */
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
/* arch/i386/mach-xen/evtchn.c */
/* Force a proper event-channel callback from Xen. */
@ -67,6 +66,17 @@ u64 jiffies_to_st(unsigned long jiffies);
#define MULTI_UVMFLAGS_INDEX 3
#define MULTI_UVMDOMID_INDEX 4
#define is_running_on_xen() (xen_start_info ? 1 : 0)
enum xen_domain_type {
XEN_NATIVE,
XEN_PV_DOMAIN,
XEN_HVM_DOMAIN,
};
extern enum xen_domain_type xen_domain_type;
#define xen_domain() (xen_domain_type != XEN_NATIVE)
#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN)
#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
#endif /* ASM_X86__XEN__HYPERVISOR_H */

View File

@ -360,6 +360,7 @@ typedef struct elf64_shdr {
#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
/* Note header in a PT_NOTE section */

View File

@ -182,7 +182,7 @@ extern int vsscanf(const char *, const char *, va_list)
extern int get_option(char **str, int *pint);
extern char *get_options(const char *str, int nints, int *ints);
extern unsigned long long memparse(char *ptr, char **retptr);
extern unsigned long long memparse(const char *ptr, char **retptr);
extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr);

View File

@ -7,6 +7,7 @@
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/mmdebug.h>
#include <linux/mmzone.h>
#include <linux/rbtree.h>
#include <linux/prio_tree.h>
@ -219,12 +220,6 @@ struct inode;
*/
#include <linux/page-flags.h>
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
#else
#define VM_BUG_ON(condition) do { } while(0)
#endif
/*
* Methods to modify the page usage count.
*
@ -919,7 +914,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
}
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
#if USE_SPLIT_PTLOCKS
/*
* We tuck a spinlock to guard each pagetable page into its struct page,
* at page->private, with BUILD_BUG_ON to make sure that this will not
@ -932,14 +927,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
} while (0)
#define pte_lock_deinit(page) ((page)->mapping = NULL)
#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
#else
#else /* !USE_SPLIT_PTLOCKS */
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
*/
#define pte_lock_init(page) do {} while (0)
#define pte_lock_deinit(page) do {} while (0)
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#endif /* USE_SPLIT_PTLOCKS */
static inline void pgtable_page_ctor(struct page *page)
{

View File

@ -21,11 +21,13 @@
struct address_space;
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
#define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
#if USE_SPLIT_PTLOCKS
typedef atomic_long_t mm_counter_t;
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#else /* !USE_SPLIT_PTLOCKS */
typedef unsigned long mm_counter_t;
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#endif /* !USE_SPLIT_PTLOCKS */
/*
* Each physical page in the system has a struct page associated with
@ -65,7 +67,7 @@ struct page {
* see PAGE_MAPPING_ANON below.
*/
};
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
#if USE_SPLIT_PTLOCKS
spinlock_t ptl;
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */

18
include/linux/mmdebug.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef LINUX_MM_DEBUG_H
#define LINUX_MM_DEBUG_H 1
#include <linux/autoconf.h>
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
#else
#define VM_BUG_ON(cond) do { } while (0)
#endif
#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
#define VIRTUAL_BUG_ON(cond) do { } while (0)
#endif
#endif

View File

@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
#if USE_SPLIT_PTLOCKS
/*
* The mm counters are not protected by its page_table_lock,
* so must be incremented atomically.
@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#else /* !USE_SPLIT_PTLOCKS */
/*
* The mm counters are protected by its page_table_lock,
* so can be incremented directly.
@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) (mm)->_##member++
#define dec_mm_counter(mm, member) (mm)->_##member--
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#endif /* !USE_SPLIT_PTLOCKS */
#define get_mm_rss(mm) \
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))

View File

@ -0,0 +1,160 @@
/*
* Copyright (c) 2001-2002 by David Brownell
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __LINUX_USB_EHCI_DEF_H
#define __LINUX_USB_EHCI_DEF_H
/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
/* Section 2.2 Host Controller Capability Registers */
struct ehci_caps {
/* these fields are specified as 8 and 16 bit registers,
* but some hosts can't perform 8 or 16 bit PCI accesses.
*/
u32 hc_capbase;
#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */
#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */
u32 hcs_params; /* HCSPARAMS - offset 0x4 */
#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */
#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */
#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */
#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */
#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */
#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */
#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */
u32 hcc_params; /* HCCPARAMS - offset 0x8 */
#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */
#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */
#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */
#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute [8]; /* nibbles for routing - offset 0xC */
} __attribute__ ((packed));
/* Section 2.3 Host Controller Operational Registers */
struct ehci_regs {
/* USBCMD: offset 0x00 */
u32 command;
/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
#define CMD_PARK (1<<11) /* enable "park" on async qh */
#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */
#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */
#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */
#define CMD_ASE (1<<5) /* async schedule enable */
#define CMD_PSE (1<<4) /* periodic schedule enable */
/* 3:2 is periodic frame list size */
#define CMD_RESET (1<<1) /* reset HC not bus */
#define CMD_RUN (1<<0) /* start/stop HC */
/* USBSTS: offset 0x04 */
u32 status;
#define STS_ASS (1<<15) /* Async Schedule Status */
#define STS_PSS (1<<14) /* Periodic Schedule Status */
#define STS_RECL (1<<13) /* Reclamation */
#define STS_HALT (1<<12) /* Not running (any reason) */
/* some bits reserved */
/* these STS_* flags are also intr_enable bits (USBINTR) */
#define STS_IAA (1<<5) /* Interrupted on async advance */
#define STS_FATAL (1<<4) /* such as some PCI access errors */
#define STS_FLR (1<<3) /* frame list rolled over */
#define STS_PCD (1<<2) /* port change detect */
#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */
#define STS_INT (1<<0) /* "normal" completion (short, ...) */
/* USBINTR: offset 0x08 */
u32 intr_enable;
/* FRINDEX: offset 0x0C */
u32 frame_index; /* current microframe number */
/* CTRLDSSEGMENT: offset 0x10 */
u32 segment; /* address bits 63:32 if needed */
/* PERIODICLISTBASE: offset 0x14 */
u32 frame_list; /* points to periodic list */
/* ASYNCLISTADDR: offset 0x18 */
u32 async_next; /* address of next async queue head */
u32 reserved [9];
/* CONFIGFLAG: offset 0x40 */
u32 configured_flag;
#define FLAG_CF (1<<0) /* true: we'll support "high speed" */
/* PORTSC: offset 0x44 */
u32 port_status [0]; /* up to N_PORTS */
/* 31:23 reserved */
#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */
#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */
#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */
/* 19:16 for port testing */
#define PORT_LED_OFF (0<<14)
#define PORT_LED_AMBER (1<<14)
#define PORT_LED_GREEN (2<<14)
#define PORT_LED_MASK (3<<14)
#define PORT_OWNER (1<<13) /* true: companion hc owns this port */
#define PORT_POWER (1<<12) /* true: has power (see PPC) */
#define PORT_USB11(x) (((x)&(3<<10)) == (1<<10)) /* USB 1.1 device */
/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
/* 9 reserved */
#define PORT_RESET (1<<8) /* reset port */
#define PORT_SUSPEND (1<<7) /* suspend port */
#define PORT_RESUME (1<<6) /* resume it */
#define PORT_OCC (1<<5) /* over current change */
#define PORT_OC (1<<4) /* over current active */
#define PORT_PEC (1<<3) /* port enable change */
#define PORT_PE (1<<2) /* port enable */
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
} __attribute__ ((packed));
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
#define USBMODE_BE (1<<2) /* BE/LE endianness select */
#define USBMODE_CM_HC (3<<0) /* host controller mode */
#define USBMODE_CM_IDLE (0<<0) /* idle state */
/* Appendix C, Debug port ... intended for use with special "debug devices"
* that can help if there's no serial console. (nonstandard enumeration.)
*/
struct ehci_dbg_port {
u32 control;
#define DBGP_OWNER (1<<30)
#define DBGP_ENABLED (1<<28)
#define DBGP_DONE (1<<16)
#define DBGP_INUSE (1<<10)
#define DBGP_ERRCODE(x) (((x)>>7)&0x07)
# define DBGP_ERR_BAD 1
# define DBGP_ERR_SIGNAL 2
#define DBGP_ERROR (1<<6)
#define DBGP_GO (1<<5)
#define DBGP_OUT (1<<4)
#define DBGP_LEN(x) (((x)>>0)&0x0f)
u32 pids;
#define DBGP_PID_GET(x) (((x)>>16)&0xff)
#define DBGP_PID_SET(data, tok) (((data)<<8)|(tok))
u32 data03;
u32 data47;
u32 address;
#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
} __attribute__ ((packed));
#endif /* __LINUX_USB_EHCI_DEF_H */

View File

@ -1,61 +0,0 @@
/******************************************************************************
* balloon.h
*
* Xen balloon driver - enables returning/claiming memory to/from Xen.
*
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef __XEN_BALLOON_H__
#define __XEN_BALLOON_H__
#include <linux/spinlock.h>
#if 0
/*
* Inform the balloon driver that it should allow some slop for device-driver
* memory activities.
*/
void balloon_update_driver_allowance(long delta);
/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
struct page **alloc_empty_pages_and_pagevec(int nr_pages);
void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
void balloon_release_driver_page(struct page *page);
/*
* Prevent the balloon driver from changing the memory reservation during
* a driver critical region.
*/
extern spinlock_t balloon_lock;
#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
#endif
#endif /* __XEN_BALLOON_H__ */

View File

@ -46,6 +46,8 @@ extern void xen_irq_resume(void);
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq);
void xen_set_irq_pending(int irq);
bool xen_test_irq_pending(int irq);
/* Poll waiting for an irq to become pending. In the usual case, the
irq will be disabled so it won't deliver an interrupt. */

View File

@ -495,6 +495,15 @@ config DEBUG_VM
If unsure, say N.
config DEBUG_VIRTUAL
bool "Debug VM translations"
depends on DEBUG_KERNEL && X86
help
Enable some costly sanity checks in virtual to page code. This can
catch mistakes with virt_to_page() and friends.
If unsure, say N.
config DEBUG_WRITECOUNT
bool "Debug filesystem writers count"
depends on DEBUG_KERNEL

View File

@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints)
* megabyte, or one gigabyte, respectively.
*/
unsigned long long memparse(char *ptr, char **retptr)
unsigned long long memparse(const char *ptr, char **retptr)
{
char *endptr; /* local pointer to end of parsed string */

Some files were not shown because too many files have changed in this diff Show More