linux/arch/x86/include/asm/x86_init.h
Linus Torvalds b18d62891a Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 APIC updates from Thomas Gleixner:
 "This update provides a major overhaul of the APIC initialization and
  vector allocation code:

   - Unification of the APIC and interrupt mode setup which was
     scattered all over the place and was hard to follow. This also
     distangles the timer setup from the APIC initialization which
     brings a clear separation of functionality.

     Great detective work from Dou Lyiang!

   - Refactoring of the x86 vector allocation mechanism. The existing
     code was based on nested loops and rather convoluted APIC callbacks
     which had a horrible worst case behaviour and tried to serve all
     different use cases in one go. This led to quite odd hacks when
     supporting the new managed interupt facility for multiqueue devices
     and made it more or less impossible to deal with the vector space
     exhaustion which was a major roadblock for server hibernation.

     Aside of that the code dealing with cpu hotplug and the system
     vectors was disconnected from the actual vector management and
     allocation code, which made it hard to follow and maintain.

     Utilizing the new bitmap matrix allocator core mechanism, the new
     allocator and management code consolidates the handling of system
     vectors, legacy vectors, cpu hotplug mechanisms and the actual
     allocation which needs to be aware of system and legacy vectors and
     hotplug constraints into a single consistent entity.

     This has one visible change: The support for multi CPU targets of
     interrupts, which is only available on a certain subset of
     CPUs/APIC variants has been removed in favour of single interrupt
     targets. A proper analysis of the multi CPU target feature revealed
     that there is no real advantage as the vast majority of interrupts
     end up on the CPU with the lowest APIC id in the set of target CPUs
     anyway. That change was agreed on by the relevant folks and allowed
     to simplify the implementation significantly and to replace rather
     fragile constructs like the vector cleanup IPI with straight
     forward and solid code.

     Furthermore this allowed to cleanly separate the allocation details
     for legacy, normal and managed interrupts:

      * Legacy interrupts are not longer wasting 16 vectors
        unconditionally

      * Managed interrupts have now a guaranteed vector reservation, but
        the actual vector assignment happens when the interrupt is
        requested. It's guaranteed not to fail.

      * Normal interrupts no longer allocate vectors unconditionally
        when the interrupt is set up (IO/APIC init or MSI(X) enable).
        The mechanism has been switched to a best effort reservation
        mode. The actual allocation happens when the interrupt is
        requested. Contrary to managed interrupts the request can fail
        due to vector space exhaustion, but drivers must handle a fail
        of request_irq() anyway. When the interrupt is freed, the vector
        is handed back as well.

        This solves a long standing problem with large unconditional
        vector allocations for a certain class of enterprise devices
        which prevented server hibernation due to vector space
        exhaustion when the unused allocated vectors had to be migrated
        to CPU0 while unplugging all non boot CPUs.

     The code has been equipped with trace points and detailed debugfs
     information to aid analysis of the vector space"

* 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits)
  x86/vector/msi: Select CONFIG_GENERIC_IRQ_RESERVATION_MODE
  PCI/MSI: Set MSI_FLAG_MUST_REACTIVATE in core code
  genirq: Add config option for reservation mode
  x86/vector: Use correct per cpu variable in free_moved_vector()
  x86/apic/vector: Ignore set_affinity call for inactive interrupts
  x86/apic: Fix spelling mistake: "symmectic" -> "symmetric"
  x86/apic: Use dead_cpu instead of current CPU when cleaning up
  ACPI/init: Invoke early ACPI initialization earlier
  x86/vector: Respect affinity mask in irq descriptor
  x86/irq: Simplify hotplug vector accounting
  x86/vector: Switch IOAPIC to global reservation mode
  x86/vector/msi: Switch to global reservation mode
  x86/vector: Handle managed interrupts proper
  x86/io_apic: Reevaluate vector configuration on activate()
  iommu/amd: Reevaluate vector configuration on activate()
  iommu/vt-d: Reevaluate vector configuration on activate()
  x86/apic/msi: Force reactivation of interrupts at startup time
  x86/vector: Untangle internal state from irq_cfg
  x86/vector: Compile SMP only code conditionally
  x86/apic: Remove unused callbacks
  ...
2017-11-13 18:29:23 -08:00

292 lines
9.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H
#include <asm/bootparam.h>
struct mpc_bus;
struct mpc_cpu;
struct mpc_table;
struct cpuinfo_x86;
/**
* struct x86_init_mpparse - platform specific mpparse ops
* @mpc_record: platform specific mpc record accounting
* @setup_ioapic_ids: platform specific ioapic id override
* @mpc_apic_id: platform specific mpc apic id assignment
* @smp_read_mpc_oem: platform specific oem mpc table setup
* @mpc_oem_pci_bus: platform specific pci bus setup (default NULL)
* @mpc_oem_bus_info: platform specific mpc bus info
* @find_smp_config: find the smp configuration
* @get_smp_config: get the smp configuration
*/
struct x86_init_mpparse {
void (*mpc_record)(unsigned int mode);
void (*setup_ioapic_ids)(void);
int (*mpc_apic_id)(struct mpc_cpu *m);
void (*smp_read_mpc_oem)(struct mpc_table *mpc);
void (*mpc_oem_pci_bus)(struct mpc_bus *m);
void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
void (*find_smp_config)(void);
void (*get_smp_config)(unsigned int early);
};
/**
* struct x86_init_resources - platform specific resource related ops
* @probe_roms: probe BIOS roms
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
*
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
};
/**
* struct x86_init_irqs - platform specific interrupt setup
* @pre_vector_init: init code to run before interrupt vectors
* are set up.
* @intr_init: interrupt init code
* @trap_init: platform specific trap setup
* @intr_mode_init: interrupt delivery mode setup
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
void (*trap_init)(void);
void (*intr_mode_init)(void);
};
/**
* struct x86_init_oem - oem platform specific customizing functions
* @arch_setup: platform specific architecture setup
* @banner: print a platform specific banner
*/
struct x86_init_oem {
void (*arch_setup)(void);
void (*banner)(void);
};
/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_init: platform specific paging initialization call to setup
* the kernel pagetables and prepare accessors functions.
* Callback must call paging_init(). Called once after the
* direct mapping for phys memory is available.
*/
struct x86_init_paging {
void (*pagetable_init)(void);
};
/**
* struct x86_init_timers - platform specific timer setup
* @setup_perpcu_clockev: set up the per cpu clock event device for the
* boot cpu
* @timer_init: initialize the platform timer (default PIT/HPET)
* @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
void (*timer_init)(void);
void (*wallclock_init)(void);
};
/**
* struct x86_init_iommu - platform specific iommu setup
* @iommu_init: platform specific iommu setup
*/
struct x86_init_iommu {
int (*iommu_init)(void);
};
/**
* struct x86_init_pci - platform specific pci init functions
* @arch_init: platform specific pci arch init call
* @init: platform specific pci subsystem init
* @init_irq: platform specific pci irq init
* @fixup_irqs: platform specific pci irq fixup
*/
struct x86_init_pci {
int (*arch_init)(void);
int (*init)(void);
void (*init_irq)(void);
void (*fixup_irqs)(void);
};
/**
* struct x86_hyper_init - x86 hypervisor init functions
* @init_platform: platform setup
* @guest_late_init: guest late init
* @x2apic_available: X2APIC detection
* @init_mem_mapping: setup early mappings during init_mem_mapping()
*/
struct x86_hyper_init {
void (*init_platform)(void);
void (*guest_late_init)(void);
bool (*x2apic_available)(void);
void (*init_mem_mapping)(void);
};
/**
* struct x86_init_ops - functions for platform specific setup
*
*/
struct x86_init_ops {
struct x86_init_resources resources;
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
struct x86_init_pci pci;
struct x86_hyper_init hyper;
};
/**
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
};
struct timespec;
/**
* struct x86_legacy_devices - legacy x86 devices
*
* @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform
* is known to never have a PNPBIOS.
*
* These are devices known to require LPC or ISA bus. The definition of legacy
* devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag
* ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on
* the LPC or ISA bus. User visible devices are devices that have end-user
* accessible connectors (for example, LPT parallel port). Legacy devices on
* the LPC bus consist for example of serial and parallel ports, PS/2 keyboard
* / mouse, and the floppy disk controller. A system that lacks all known
* legacy devices can assume all devices can be detected exclusively via
* standard device enumeration mechanisms including the ACPI namespace.
*
* A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not
* have any of the legacy devices enumerated below present.
*/
struct x86_legacy_devices {
int pnpbios;
};
/**
* enum x86_legacy_i8042_state - i8042 keyboard controller state
* @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on
* given platform/subarch.
* @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
* is absent.
* @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
* present, the i8042 driver should probe for controller existence.
*/
enum x86_legacy_i8042_state {
X86_LEGACY_I8042_PLATFORM_ABSENT,
X86_LEGACY_I8042_FIRMWARE_ABSENT,
X86_LEGACY_I8042_EXPECTED_PRESENT,
};
/**
* struct x86_legacy_features - legacy x86 features
*
* @i8042: indicated if we expect the device to have i8042 controller
* present.
* @rtc: this device has a CMOS real-time clock present
* @reserve_bios_regions: boot code will search for the EBDA address and the
* start of the 640k - 1M BIOS region. If false, the platform must
* ensure that its memory map correctly reserves sub-1MB regions as needed.
* @devices: legacy x86 devices, refer to struct x86_legacy_devices
* documentation for further details.
*/
struct x86_legacy_features {
enum x86_legacy_i8042_state i8042;
int rtc;
int no_vga;
int reserve_bios_regions;
struct x86_legacy_devices devices;
};
/**
* struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
*
* @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
*/
struct x86_hyper_runtime {
void (*pin_vcpu)(int cpu);
};
/**
* struct x86_platform_ops - platform specific runtime functions
* @calibrate_cpu: calibrate CPU
* @calibrate_tsc: calibrate TSC, if different from CPU
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
* @nmi_init enable NMI on cpus
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
* @apic_post_init: adjust apic if needed
* @legacy: legacy features
* @set_legacy_features: override legacy features. Use of this callback
* is highly discouraged. You should only need
* this if your hardware platform requires further
* custom fine tuning far beyond what may be
* possible in x86_early_init_platform_quirks() by
* only using the current x86_hardware_subarch
* semantics.
* @hyper: x86 hypervisor specific runtime callbacks
*/
struct x86_platform_ops {
unsigned long (*calibrate_cpu)(void);
unsigned long (*calibrate_tsc)(void);
void (*get_wallclock)(struct timespec *ts);
int (*set_wallclock)(const struct timespec *ts);
void (*iommu_shutdown)(void);
bool (*is_untracked_pat_range)(u64 start, u64 end);
void (*nmi_init)(void);
unsigned char (*get_nmi_reason)(void);
void (*save_sched_clock_state)(void);
void (*restore_sched_clock_state)(void);
void (*apic_post_init)(void);
struct x86_legacy_features legacy;
void (*set_legacy_features)(void);
struct x86_hyper_runtime hyper;
};
struct pci_dev;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
};
struct x86_io_apic_ops {
unsigned int (*read) (unsigned int apic, unsigned int reg);
void (*disable)(void);
};
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
extern struct x86_io_apic_ops x86_io_apic_ops;
extern void x86_early_init_platform_quirks(void);
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
#endif