diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps new file mode 100644 index 000000000000..8757dcf41c08 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps @@ -0,0 +1,18 @@ +What: /sys/bus/event_source/devices//caps +Date: May 2022 +KernelVersion: 5.19 +Contact: Linux kernel mailing list +Description: + Attribute group to describe the capabilities exposed + for a particular pmu. Each attribute of this group can + expose information specific to a PMU, say pmu_name, so that + userspace can understand some of the feature which the + platform specific PMU supports. + + One of the example available capability in supported platform + like Intel is pmu_name, which exposes underlying CPU name known + to the PMU driver. + + Example output in powerpc: + grep . /sys/bus/event_source/devices/cpu/caps/* + /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9dc68a12a5bf..f251f5cce63b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3553,9 +3553,6 @@ noautogroup Disable scheduler automatic task group creation. - nobats [PPC] Do not use BATs for mapping kernel lowmem - on "Classic" PPC cores. - nocache [ARM] nodsp [SH] Disable hardware DSP at boot time. @@ -3725,9 +3722,6 @@ nolapic_timer [X86-32,APIC] Do not use the local APIC timer. - noltlbs [PPC] Do not use large page/tlb entries for kernel - lowmem mapping on PPC40x and PPC8xx - nomca [IA-64] Disable machine check abort handling nomce [X86-32] Disable Machine Check Exception diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 8ab042beeb76..ee6572b1edad 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -592,6 +592,18 @@ to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst). +nmi_wd_lpm_factor (PPC only) +============================ + +Factor to apply to the NMI watchdog timeout (only when ``nmi_watchdog`` is +set to 1). This factor represents the percentage added to +``watchdog_thresh`` when calculating the NMI watchdog timeout during an +LPM. The soft lockup timeout is not impacted. + +A value of 0 means no change. The default value is 200 meaning the NMI +watchdog is set to 30s (based on ``watchdog_thresh`` equal to 10). + + numa_balancing ============== diff --git a/Documentation/powerpc/elf_hwcaps.rst b/Documentation/powerpc/elf_hwcaps.rst new file mode 100644 index 000000000000..3366e5b18e67 --- /dev/null +++ b/Documentation/powerpc/elf_hwcaps.rst @@ -0,0 +1,231 @@ +.. _elf_hwcaps_powerpc: + +================== +POWERPC ELF HWCAPs +================== + +This document describes the usage and semantics of the powerpc ELF HWCAPs. + + +1. Introduction +--------------- + +Some hardware or software features are only available on some CPU +implementations, and/or with certain kernel configurations, but have no other +discovery mechanism available to userspace code. The kernel exposes the +presence of these features to userspace through a set of flags called HWCAPs, +exposed in the auxiliary vector. + +Userspace software can test for features by acquiring the AT_HWCAP or +AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant +flags are set, e.g.:: + + bool floating_point_is_present(void) + { + unsigned long HWCAPs = getauxval(AT_HWCAP); + if (HWCAPs & PPC_FEATURE_HAS_FPU) + return true; + + return false; + } + +Where software relies on a feature described by a HWCAP, it should check the +relevant HWCAP flag to verify that the feature is present before attempting to +make use of the feature. + +HWCAP is the preferred method to test for the presence of a feature rather +than probing through other means, which may not be reliable or may cause +unpredictable behaviour. + +Software that targets a particular platform does not necessarily have to +test for required or implied features. For example if the program requires +FPU, VMX, VSX, it is not necessary to test those HWCAPs, and it may be +impossible to do so if the compiler generates code requiring those features. + +2. Facilities +------------- + +The Power ISA uses the term "facility" to describe a class of instructions, +registers, interrupts, etc. The presence or absence of a facility indicates +whether this class is available to be used, but the specifics depend on the +ISA version. For example, if the VSX facility is available, the VSX +instructions that can be used differ between the v3.0B and v3.1B ISA +versions. + +3. Categories +------------- + +The Power ISA before v3.0 uses the term "category" to describe certain +classes of instructions and operating modes which may be optional or +mutually exclusive, the exact meaning of the HWCAP flag may depend on +context, e.g., the presence of the BOOKE feature implies that the server +category is not implemented. + +4. HWCAP allocation +------------------- + +HWCAPs are allocated as described in Power Architecture 64-Bit ELF V2 ABI +Specification (which will be reflected in the kernel's uapi headers). + +5. The HWCAPs exposed in AT_HWCAP +--------------------------------- + +PPC_FEATURE_32 + 32-bit CPU + +PPC_FEATURE_64 + 64-bit CPU (userspace may be running in 32-bit mode). + +PPC_FEATURE_601_INSTR + The processor is PowerPC 601. + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_HAS_ALTIVEC + Vector (aka Altivec, VMX) facility is available. + +PPC_FEATURE_HAS_FPU + Floating point facility is available. + +PPC_FEATURE_HAS_MMU + Memory management unit is present and enabled. + +PPC_FEATURE_HAS_4xxMAC + The processor is 40x or 44x family. + +PPC_FEATURE_UNIFIED_CACHE + The processor has a unified L1 cache for instructions and data, as + found in NXP e200. + Unused in the kernel since 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") + +PPC_FEATURE_HAS_SPE + Signal Processing Engine facility is available. + +PPC_FEATURE_HAS_EFP_SINGLE + Embedded Floating Point single precision operations are available. + +PPC_FEATURE_HAS_EFP_DOUBLE + Embedded Floating Point double precision operations are available. + +PPC_FEATURE_NO_TB + The timebase facility (mftb instruction) is not available. + This is a 601 specific HWCAP, so if it is known that the processor + running is not a 601, via other HWCAPs or other means, it is not + required to test this bit before using the timebase. + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_POWER4 + The processor is POWER4 or PPC970/FX/MP. + POWER4 support dropped from the kernel since 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") + +PPC_FEATURE_POWER5 + The processor is POWER5. + +PPC_FEATURE_POWER5_PLUS + The processor is POWER5+. + +PPC_FEATURE_CELL + The processor is Cell. + +PPC_FEATURE_BOOKE + The processor implements the embedded category ("BookE") architecture. + +PPC_FEATURE_SMT + The processor implements SMT. + +PPC_FEATURE_ICACHE_SNOOP + The processor icache is coherent with the dcache, and instruction storage + can be made consistent with data storage for the purpose of executing + instructions with the sequence (as described in, e.g., POWER9 Processor + User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)):: + + sync + icbi (to any address) + isync + +PPC_FEATURE_ARCH_2_05 + The processor supports the v2.05 userlevel architecture. Processors + supporting later architectures DO NOT set this feature. + +PPC_FEATURE_PA6T + The processor is PA6T. + +PPC_FEATURE_HAS_DFP + DFP facility is available. + +PPC_FEATURE_POWER6_EXT + The processor is POWER6. + +PPC_FEATURE_ARCH_2_06 + The processor supports the v2.06 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE_HAS_VSX + VSX facility is available. + +PPC_FEATURE_PSERIES_PERFMON_COMPAT + The processor supports architected PMU events in the range 0xE0-0xFF. + +PPC_FEATURE_TRUE_LE + The processor supports true little-endian mode. + +PPC_FEATURE_PPC_LE + The processor supports "PowerPC Little-Endian", that uses address + munging to make storage access appear to be little-endian, but the + data is stored in a different format that is unsuitable to be + accessed by other agents not running in this mode. + +6. The HWCAPs exposed in AT_HWCAP2 +---------------------------------- + +PPC_FEATURE2_ARCH_2_07 + The processor supports the v2.07 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HTM + Transactional Memory feature is available. + +PPC_FEATURE2_DSCR + DSCR facility is available. + +PPC_FEATURE2_EBB + EBB facility is available. + +PPC_FEATURE2_ISEL + isel instruction is available. This is superseded by ARCH_2_07 and + later. + +PPC_FEATURE2_TAR + TAR facility is available. + +PPC_FEATURE2_VEC_CRYPTO + v2.07 crypto instructions are available. + +PPC_FEATURE2_HTM_NOSC + System calls fail if called in a transactional state, see + Documentation/powerpc/syscall64-abi.rst + +PPC_FEATURE2_ARCH_3_00 + The processor supports the v3.0B / v3.0C userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HAS_IEEE128 + IEEE 128-bit binary floating point is supported with VSX + quad-precision instructions and data types. + +PPC_FEATURE2_DARN + darn instruction is available. + +PPC_FEATURE2_SCV + The scv 0 instruction may be used for system calls, see + Documentation/powerpc/syscall64-abi.rst. + +PPC_FEATURE2_HTM_NO_SUSPEND + A limited Transactional Memory facility that does not support suspend is + available, see Documentation/powerpc/transactional_memory.rst. + +PPC_FEATURE2_ARCH_3_1 + The processor supports the v3.1 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_MMA + MMA facility is available. diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index 0f7d3c495693..85e80e30160b 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -17,6 +17,7 @@ powerpc dawr-power9 dscr eeh-pci-error-recovery + elf_hwcaps elfnote firmware-assisted-dump hvcs diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index 223c99361a30..29153eed6689 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -425,6 +425,18 @@ pnx833x_wdt: ------------------------------------------------- +pseries-wdt: + action: + Action taken when watchdog expires: 0 (power off), 1 (restart), + 2 (dump and restart). (default=1) + timeout: + Initial watchdog timeout in seconds. (default=60) + nowayout: + Watchdog cannot be stopped once started. + (default=kernel config parameter) + +------------------------------------------------- + rc32434_wdt: timeout: Watchdog timeout value, in seconds (default=20) diff --git a/MAINTAINERS b/MAINTAINERS index c8375b652528..50c0d220cb73 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11628,8 +11628,8 @@ F: drivers/macintosh/ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman -R: Benjamin Herrenschmidt -R: Paul Mackerras +R: Nicholas Piggin +R: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index af612d4c4bcc..4c466acdc70d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -11,7 +11,7 @@ config 64BIT config LIVEPATCH_64 def_bool PPC64 - depends on LIVEPATCH + depends on LIVEPATCH config MMU bool @@ -192,8 +192,10 @@ config PPC select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN if PPC_RADIX_MMU + select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -253,6 +255,7 @@ config PPC select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN && MODULES select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_MERGE_VMAS @@ -376,6 +379,17 @@ config PPC_DCR depends on PPC_DCR_NATIVE || PPC_DCR_MMIO default y +config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + depends on PPC32 + depends on !PPC_PMAC && !PPC_CHRP + bool "Assign PCI bus numbers from zero individually for each PCI domain" + help + By default on PPC32 were PCI bus numbers unique across all PCI domains. + So system could have only 256 PCI buses independently of available + PCI domains. When this option is enabled then PCI bus numbers are + PCI domain dependent and each PCI controller on own domain can have + 256 PCI buses, like it is on other Linux architectures. + config PPC_OF_PLATFORM_PCI bool depends on PCI @@ -452,7 +466,7 @@ choice default MATH_EMULATION_FULL depends on MATH_EMULATION -config MATH_EMULATION_FULL +config MATH_EMULATION_FULL bool "Emulate all the floating point instructions" help Select this option will enable the kernel to support to emulate @@ -554,7 +568,6 @@ config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE select HAVE_IMA_KEXEC if IMA - select BUILD_BIN2C select KEXEC_ELF depends on PPC64 depends on CRYPTO=y diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 9f363c143d86..ae727d4218b9 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -305,7 +305,6 @@ config PPC_EARLY_DEBUG_OPAL def_bool y depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI - config PPC_EARLY_DEBUG_HVSI_VTERMNO hex "vterm number to use with early debug HVSI" depends on PPC_EARLY_DEBUG_LPAR_HVSI @@ -375,4 +374,5 @@ config KASAN_SHADOW_OFFSET hex depends on KASAN default 0xe0000000 if PPC32 - default 0xa80e000000000000 if PPC64 + default 0xa80e000000000000 if PPC_BOOK3S_64 + default 0xa8001c0000000000 if PPC_BOOK3E_64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a0cd70712061..02742facf895 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -15,23 +15,6 @@ HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= -ifeq ($(HAS_BIARCH),y) -ifeq ($(CROSS32_COMPILE),) -ifdef CONFIG_PPC32 -# These options will be overridden by any -mcpu option that the CPU -# or platform code sets later on the command line, but they are needed -# to set a sane 32-bit cpu target for the 64-bit cross compiler which -# may default to the wrong ISA. -KBUILD_CFLAGS += -mcpu=powerpc -KBUILD_AFLAGS += -mcpu=powerpc -endif -endif -endif - -ifdef CONFIG_PPC_BOOK3S_32 -KBUILD_CFLAGS += -mcpu=powerpc -endif - # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use # ppc64_defconfig because we have nothing better to go on. uname := $(shell uname -m) @@ -183,24 +166,11 @@ endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) +AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) -# Altivec option not allowed with e500mc64 in GCC. -ifdef CONFIG_ALTIVEC -E5500_CPU := -mcpu=powerpc64 -else -E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) -endif -CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) +CFLAGS-$(CONFIG_E5500_CPU) += $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) -ifdef CONFIG_PPC32 -ifdef CONFIG_PPC_E500MC -CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc) -else -CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc) -endif -endif - asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi index 7a590c92fe56..81b9ab2119be 100644 --- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi @@ -48,6 +48,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <26 2 0 0>; + law_trgt_if = <2>; pcie@0 { reg = <0 0 0 0 0>; @@ -76,6 +77,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <25 2 0 0>; + law_trgt_if = <1>; pcie@0 { reg = <0 0 0 0 0>; @@ -105,6 +107,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <24 2 0 0>; + law_trgt_if = <0>; pcie@0 { reg = <0 0 0 0 0>; diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts new file mode 100644 index 000000000000..12e08271e61f --- /dev/null +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Turris 1.x Device Tree Source + * + * Copyright 2013 - 2022 CZ.NIC z.s.p.o. (http://www.nic.cz/) + * + * Pinout, Schematics and Altium hardware design files are open source + * and available at: https://docs.turris.cz/hw/turris-1x/turris-1x/ + */ + +#include +#include +#include +/include/ "fsl/p2020si-pre.dtsi" + +/ { + model = "Turris 1.x"; + compatible = "cznic,turris1x", "fsl,P2020RDB-PC"; /* fsl,P2020RDB-PC is required for booting Linux */ + + aliases { + ethernet0 = &enet0; + ethernet1 = &enet1; + ethernet2 = &enet2; + serial0 = &serial0; + serial1 = &serial1; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + spi0 = &spi0; + }; + + memory { + device_type = "memory"; + }; + + soc: soc@ffe00000 { + ranges = <0x0 0x0 0xffe00000 0x00100000>; + + i2c@3000 { + /* PCA9557PW GPIO controller for boot config */ + gpio-controller@18 { + compatible = "nxp,pca9557"; + label = "bootcfg"; + reg = <0x18>; + #gpio-cells = <2>; + gpio-controller; + polarity = <0x00>; + }; + + /* STM32F030R8T6 MCU for power control */ + power-control@2a { + /* + * Turris Power Control firmware runs on STM32F0 MCU. + * This firmware is open source and available at: + * https://gitlab.nic.cz/turris/hw/turris_power_control + */ + reg = <0x2a>; + }; + + /* DDR3 SPD/EEPROM PSWP instruction */ + eeprom@32 { + reg = <0x32>; + }; + + /* SA56004ED temperature control */ + temperature-sensor@4c { + compatible = "nxp,sa56004"; + reg = <0x4c>; + interrupt-parent = <&gpio>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */ + <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */ + }; + + /* DDR3 SPD/EEPROM */ + eeprom@52 { + compatible = "atmel,spd"; + reg = <0x52>; + }; + + /* MCP79402-I/ST Protected EEPROM */ + eeprom@57 { + reg = <0x57>; + }; + + /* ATSHA204-TH-DA-T crypto module */ + crypto@64 { + compatible = "atmel,atsha204"; + reg = <0x64>; + }; + + /* IDT6V49205BNLGI clock generator */ + clock-generator@69 { + compatible = "idt,6v49205b"; + reg = <0x69>; + }; + + /* MCP79402-I/ST RTC */ + rtc@6f { + compatible = "microchip,mcp7940x"; + reg = <0x6f>; + interrupt-parent = <&gpio>; + interrupts = <14 0>; /* GPIO14 - MFP pin */ + }; + }; + + /* SPI on connector P1 */ + spi0: spi@7000 { + }; + + gpio: gpio-controller@fc00 { + #interrupt-cells = <2>; + interrupt-controller; + }; + + /* Connected to SMSC USB2412-DZK 2-Port USB 2.0 Hub Controller */ + usb@22000 { + phy_type = "ulpi"; + dr_mode = "host"; + }; + + enet0: ethernet@24000 { + /* Connected to port 6 of QCA8337N-AL3C switch */ + phy-connection-type = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + mdio@24520 { + /* KSZ9031RNXCA ethernet phy for WAN port */ + phy: ethernet-phy@7 { + interrupts = <3 1 0 0>; + reg = <0x7>; + }; + + /* QCA8337N-AL3C switch with integrated ethernet PHYs for LAN ports */ + switch@10 { + compatible = "qca,qca8337"; + interrupts = <2 1 0 0>; + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu1"; + ethernet = <&enet1>; + phy-mode = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan5"; + }; + + port@2 { + reg = <2>; + label = "lan4"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan2"; + }; + + port@5 { + reg = <5>; + label = "lan1"; + }; + + port@6 { + reg = <6>; + label = "cpu0"; + ethernet = <&enet0>; + phy-mode = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; + + ptp_clock@24e00 { + fsl,tclk-period = <5>; + fsl,tmr-prsc = <200>; + fsl,tmr-add = <0xcccccccd>; + fsl,tmr-fiper1 = <0x3b9ac9fb>; + fsl,tmr-fiper2 = <0x0001869b>; + fsl,max-adj = <249999999>; + }; + + enet1: ethernet@25000 { + /* Connected to port 0 of QCA8337N-AL3C switch */ + phy-connection-type = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + mdio@25520 { + status = "disabled"; + }; + + enet2: ethernet@26000 { + /* Connected to KSZ9031RNXCA ethernet phy (WAN port) */ + label = "wan"; + phy-handle = <&phy>; + phy-connection-type = "rgmii-id"; + }; + + mdio@26520 { + status = "disabled"; + }; + + sdhc@2e000 { + bus-width = <4>; + cd-gpios = <&gpio 8 GPIO_ACTIVE_LOW>; + }; + }; + + lbc: localbus@ffe05000 { + reg = <0 0xffe05000 0 0x1000>; + + ranges = <0x0 0x0 0x0 0xef000000 0x01000000>, /* NOR */ + <0x1 0x0 0x0 0xff800000 0x00040000>, /* NAND */ + <0x3 0x0 0x0 0xffa00000 0x00020000>; /* CPLD */ + + /* S29GL128P90TFIR10 NOR */ + nor@0,0 { + compatible = "cfi-flash"; + reg = <0x0 0x0 0x01000000>; + bank-width = <2>; + device-width = <1>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + /* 128 kB for Device Tree Blob */ + reg = <0x00000000 0x00020000>; + label = "dtb"; + }; + + partition@20000 { + /* 1.7 MB for Rescue Linux Kernel Image */ + reg = <0x00020000 0x001a0000>; + label = "rescue-kernel"; + }; + + partition@1c0000 { + /* 1.5 MB for Rescue JFFS2 Root File System */ + reg = <0x001c0000 0x00180000>; + label = "rescue-rootfs"; + }; + + partition@340000 { + /* 11 MB for TAR.XZ Backup with content of NAND Root File System */ + reg = <0x00340000 0x00b00000>; + label = "backup-rootfs"; + }; + + partition@e40000 { + /* 768 kB for Certificates JFFS2 File System */ + reg = <0x00e40000 0x000c0000>; + label = "certificates"; + }; + + /* free unused space 0x00f00000-0x00f20000 */ + + partition@f20000 { + /* 128 kB for U-Boot Environment Variables */ + reg = <0x00f20000 0x00020000>; + label = "u-boot-env"; + }; + + partition@f40000 { + /* 768 kB for U-Boot Bootloader Image */ + reg = <0x00f40000 0x000c0000>; + label = "u-boot"; + }; + }; + }; + + /* MT29F2G08ABAEAWP:E NAND */ + nand@1,0 { + compatible = "fsl,p2020-fcm-nand", "fsl,elbc-fcm-nand"; + reg = <0x1 0x0 0x00040000>; + nand-ecc-mode = "soft"; + nand-ecc-algo = "bch"; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + /* 256 MB for UBI with one volume: UBIFS Root File System */ + reg = <0x00000000 0x10000000>; + label = "rootfs"; + }; + }; + }; + + /* LCMXO1200C-3FTN256C FPGA */ + cpld@3,0 { + /* + * Turris CPLD firmware which runs on this Lattice FPGA, + * is extended version of P1021RDB-PC CPLD v4.1 firmware. + * It is backward compatible with its original version + * and the only extension is support for Turris LEDs. + * Turris CPLD firmware is open source and available at: + * https://gitlab.nic.cz/turris/hw/turris_cpld/-/blob/master/CZ_NIC_Router_CPLD.v + */ + compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus", "syscon"; + reg = <0x3 0x0 0x30>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x3 0x0 0x00020000>; + + /* MAX6370KA+T watchdog */ + watchdog@2 { + /* + * CPLD firmware maps SET0, SET1 and SET2 + * input logic of MAX6370KA+T chip to CPLD + * memory space at byte offset 0x2. WDI + * input logic is outside of the CPLD and + * connected via external GPIO. + */ + compatible = "maxim,max6370"; + reg = <0x02 0x01>; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; + }; + + reboot@d { + compatible = "syscon-reboot"; + reg = <0x0d 0x01>; + offset = <0x0d>; + mask = <0x01>; + value = <0x01>; + }; + + led-controller@13 { + /* + * LEDs are controlled by CPLD firmware. + * All five LAN LEDs share common RGB settings + * and so it is not possible to set different + * colors on different LAN ports. + */ + compatible = "cznic,turris1x-leds"; + reg = <0x13 0x1d>; + #address-cells = <1>; + #size-cells = <0>; + + multi-led@0 { + reg = <0x0>; + color = ; + function = LED_FUNCTION_WAN; + }; + + multi-led@1 { + reg = <0x1>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <5>; + }; + + multi-led@2 { + reg = <0x2>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <4>; + }; + + multi-led@3 { + reg = <0x3>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <3>; + }; + + multi-led@4 { + reg = <0x4>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <2>; + }; + + multi-led@5 { + reg = <0x5>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <1>; + }; + + multi-led@6 { + reg = <0x6>; + color = ; + function = LED_FUNCTION_WLAN; + }; + + multi-led@7 { + reg = <0x7>; + color = ; + function = LED_FUNCTION_POWER; + }; + }; + }; + }; + + pci2: pcie@ffe08000 { + /* + * PCIe bus for on-board TUSB7340RKM USB 3.0 xHCI controller. + * This xHCI controller is available only on Turris 1.1 boards. + * Turris 1.0 boards have nothing connected to this PCIe bus, + * so system would see only PCIe Root Port of this PCIe Root + * Complex. TUSB7340RKM xHCI controller has four SuperSpeed + * channels. Channel 0 is connected to the front USB 3.0 port, + * channel 1 (but only USB 2.0 subset) to USB 2.0 pins on mPCIe + * slot 1 (CN5), channels 2 and 3 to connector P600. + * + * P2020 PCIe Root Port uses 1MB of PCIe MEM and xHCI controller + * uses 64kB + 8kB of PCIe MEM. No PCIe IO is used or required. + * So allocate 2MB of PCIe MEM for this PCIe bus. + */ + reg = <0 0xffe08000 0 0x1000>; + ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00200000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; + + pci1: pcie@ffe09000 { + /* PCIe bus on mPCIe slot 2 (CN6) for expansion mPCIe card */ + reg = <0 0xffe09000 0 0x1000>; + ranges = <0x02000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; + + pci0: pcie@ffe0a000 { + /* + * PCIe bus on mPCIe slot 1 (CN5) for expansion mPCIe card. + * Turris 1.1 boards have in this mPCIe slot additional USB 2.0 + * pins via channel 1 of TUSB7340RKM xHCI controller and also + * additional SIM card slot, both for USB-based WWAN cards. + */ + reg = <0 0xffe0a000 0 0x1000>; + ranges = <0x02000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc00000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; +}; + +/include/ "fsl/p2020si-post.dtsi" diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 4bc549c6edc5..fde4824f235e 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -118,7 +118,7 @@ CONFIG_CRAMFS=y CONFIG_NLS_DEFAULT="n" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 717827219921..7283b7d4a1a5 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -73,7 +73,7 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NLS_DEFAULT="n" -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index 8da316e61a08..3fdfbb29b854 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -110,7 +110,7 @@ CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3 CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_CBC=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index c11e777b2f3d..0f6380e1e612 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -56,7 +56,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PPC_EARLY_DEBUG=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 47252c2d7669..20891c413149 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -88,7 +88,7 @@ CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 63368e677506..7db479dcbc0c 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -58,6 +58,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 72762da94846..6186ead1e105 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -84,7 +84,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index a3c8ca74032c..e6735b945327 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -85,7 +85,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 5326bc739279..7f35d5bc1229 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -45,7 +45,7 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 00d69965f898..8df6d3a293e3 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -59,7 +59,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index f5c3e72da719..a98ef6a4abef 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -48,6 +48,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index df37efed0aec..f14c6dbd7346 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -24,7 +24,7 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index dcc8dccf54f3..498d35db7833 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -73,7 +73,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 83d801307178..c0fe5e76604a 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -122,6 +122,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index 00a4d2bf43b2..4145ef5689ca 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -67,7 +67,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index c74dc76b1d0d..700115d85d6f 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -71,7 +71,7 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_DEV_TALITOS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_VM_PGTABLE=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index b622ecd73286..91967824272e 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1065,7 +1065,7 @@ CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_HEADERS_INSTALL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 9d8a76857c6f..9d63e2e65211 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -68,7 +68,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 7c95fab4b920..2d9ac233da68 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -153,7 +153,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 77857d513022..083c2e57520a 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -55,6 +55,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 0e365c5b2396..564859e6a807 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -17,12 +17,7 @@ static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, s } #ifdef CONFIG_PPC_POWERNV -int powernv_hwrng_present(void); -int powernv_get_random_long(unsigned long *v); -int powernv_get_random_real_mode(unsigned long *v); -#else -static inline int powernv_hwrng_present(void) { return 0; } -static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; } +int pnv_get_random_long(unsigned long *v); #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d995c65d18ab..81631e64dbeb 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -2,8 +2,9 @@ #ifndef _ASM_POWERPC_ASM_PROTOTYPES_H #define _ASM_POWERPC_ASM_PROTOTYPES_H /* - * This file is for prototypes of C functions that are only called - * from asm, and any associated variables. + * This file is for C prototypes of asm symbols that are EXPORTed. + * It allows the modversions logic to see their prototype and + * generate proper CRCs for them. * * Copyright 2016, Daniel Axtens, IBM Corporation. */ @@ -34,12 +35,6 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* prom_init (OpenFirmware) */ -unsigned long __init prom_init(unsigned long r3, unsigned long r4, - unsigned long pp, - unsigned long r6, unsigned long r7, - unsigned long kbase); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f0e687236484..ef2d8b15eaab 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -42,6 +42,8 @@ /* The sub-arch has lwsync */ #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC +#elif defined(CONFIG_BOOKE) +# define SMPWMB mbar #else # define SMPWMB eieio #endif diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index b37a28f62cf6..aa1c67c8bfc8 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H #define _ASM_POWERPC_BOOK3S_64_HUGETLB_H + +#include + /* * For radix we want generic code to handle hugetlb. But then if we want * both hash and radix to be enabled together we need to workaround the diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d2e80f178b6d..206f920fe5b9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -138,9 +138,29 @@ static inline void flush_all_mm(struct mm_struct *mm) static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, unsigned long address) { - /* See ptep_set_access_flags comment */ - if (atomic_read(&vma->vm_mm->context.copros) > 0) - flush_tlb_page(vma, address); + /* + * Book3S 64 does not require spurious fault flushes because the PTE + * must be re-fetched in case of an access permission problem. So the + * only reason for a spurious fault should be concurrent modification + * to the PTE, in which case the PTE will eventually be re-fetched by + * the MMU when it attempts the access again. + * + * See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table + * Entry, Setting a Reference or Change Bit or Upgrading Access + * Authority (PTE Subject to Atomic Hardware Updates): + * + * "If the only change being made to a valid PTE that is subject to + * atomic hardware updates is to set the Reference or Change bit to + * 1 or to upgrade access authority, a simpler sequence suffices + * because the translation hardware will refetch the PTE if an + * access is attempted for which the only problems were reference + * and/or change bits needing to be set or insufficient access + * authority." + * + * The nest MMU in POWER9 does not perform this PTE re-fetch, but + * it avoids the spurious fault problem by flushing the TLB before + * upgrading PTE permissions, see radix__ptep_set_access_flags. + */ } extern bool tlbie_capable; diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 549eb6dd146f..ae8c3e13cfce 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -70,9 +70,6 @@ struct cpu_spec { /* Used to restore cpu setup on secondary processors and at resume */ cpu_restore_t cpu_restore; - /* Used by oprofile userspace to select the right counters */ - char *oprofile_cpu_type; - /* Name of processor class, for the ELF AT_PLATFORM entry */ char *platform; diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 504f7fe6711a..6d2b27997492 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -19,6 +19,7 @@ #include #include #include +#include typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 8dddd34b8ecf..398e0b5e485f 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -55,6 +55,7 @@ #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) #define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) +#define FW_FEATURE_WATCHDOG ASM_CONST(0x0000080000000000) #ifndef __ASSEMBLY__ @@ -76,7 +77,7 @@ enum { FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | - FW_FEATURE_ENERGY_SCALE_INFO, + FW_FEATURE_ENERGY_SCALE_INFO | FW_FEATURE_WATCHDOG, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d92a20a85395..8abae463f6c1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -79,6 +79,7 @@ #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDED -46 +#define H_P1 -54 #define H_P2 -55 #define H_P3 -56 #define H_P4 -57 @@ -87,6 +88,7 @@ #define H_P7 -60 #define H_P8 -61 #define H_P9 -62 +#define H_NOOP -63 #define H_TOO_BIG -64 #define H_UNSUPPORTED -67 #define H_OVERLAP -68 @@ -97,6 +99,8 @@ #define H_OP_MODE -73 #define H_COP_HW -74 #define H_STATE -75 +#define H_IN_USE -77 +#define H_ABORTED -78 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 @@ -321,10 +325,19 @@ #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 +#define H_PKS_GET_CONFIG 0x41C +#define H_PKS_SET_PASSWORD 0x420 +#define H_PKS_GEN_PASSWORD 0x424 +#define H_PKS_WRITE_OBJECT 0x42C +#define H_PKS_GEN_KEY 0x430 +#define H_PKS_READ_OBJECT 0x434 +#define H_PKS_REMOVE_OBJECT 0x438 +#define H_PKS_CONFIRM_OBJECT_FLUSHED 0x43C #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C #define H_GET_ENERGY_SCALE_INFO 0x450 -#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO +#define H_WATCHDOG 0x45C +#define MAX_HCALL_OPCODE H_WATCHDOG /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -350,6 +363,14 @@ /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 +/* + * Platform specific hcalls, used by QEMU/SLOF. These are ignored by + * KVM and only kept here so we can identify them during tracing. + */ +#define H_LOGICAL_MEMOP 0xF001 +#define H_CAS 0XF002 +#define H_UPDATE_DT 0XF003 + /* "Platform specific hcalls", provided by PHYP */ #define H_GET_24X7_CATALOG_PAGE 0xF078 #define H_GET_24X7_DATA 0xF07C diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 674e5aaafcbd..26ede09c521d 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -113,14 +113,7 @@ static inline void __hard_RI_enable(void) static inline notrace unsigned long irq_soft_mask_return(void) { - unsigned long flags; - - asm volatile( - "lbz %0,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, irq_soft_mask))); - - return flags; + return READ_ONCE(local_paca->irq_soft_mask); } /* @@ -130,7 +123,6 @@ static inline notrace unsigned long irq_soft_mask_return(void) */ static inline notrace void irq_soft_mask_set(unsigned long mask) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG /* * The irq mask must always include the STD bit if any are set. * @@ -145,49 +137,27 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) * unmasks to be replayed, among other things. For now, take * the simple approach. */ - WARN_ON(mask && !(mask & IRQS_DISABLED)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON(mask && !(mask & IRQS_DISABLED)); - asm volatile( - "stb %0,%1(13)" - : - : "r" (mask), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "memory"); + WRITE_ONCE(local_paca->irq_soft_mask, mask); + barrier(); } static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { - unsigned long flags; + unsigned long flags = irq_soft_mask_return(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(mask && !(mask & IRQS_DISABLED)); -#endif - - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=&r" (flags) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); + irq_soft_mask_set(mask); return flags; } static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags, tmp; + unsigned long flags = irq_soft_mask_return(); - asm volatile( - "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" - : "=&r" (flags), "=r" (tmp) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); - -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); -#endif + irq_soft_mask_set(flags | mask); return flags; } @@ -312,9 +282,7 @@ static inline bool pmi_irq_pending(void) flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) { \ - asm ("stdx %%r1, 0, %1 ;" \ - : "=m" (local_paca->saved_r1) \ - : "b" (&local_paca->saved_r1)); \ + WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\ trace_hardirqs_off(); \ } \ } while(0) @@ -353,11 +321,13 @@ bool power_pmu_wants_prompt_pmi(void); */ static inline bool should_hard_irq_enable(void) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); - WARN_ON(mfmsr() & MSR_EE); -#endif -#ifdef CONFIG_PERF_EVENTS + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(mfmsr() & MSR_EE); + } + + if (!IS_ENABLED(CONFIG_PERF_EVENTS)) + return false; /* * If the PMU is not running, there is not much reason to enable * MSR[EE] in irq handlers because any interrupts would just be @@ -372,9 +342,6 @@ static inline bool should_hard_irq_enable(void) return false; return true; -#else - return false; -#endif } /* @@ -382,11 +349,11 @@ static inline bool should_hard_irq_enable(void) */ static inline void do_hard_irq_enable(void) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); - WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); - WARN_ON(mfmsr() & MSR_EE); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); + WARN_ON(mfmsr() & MSR_EE); + } /* * This allows PMI interrupts (and watchdog soft-NMIs) through. * There is no other reason to enable this way. diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b49aae9f6f27..684d3f453282 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -139,25 +139,6 @@ static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) *(u64 *)ptr = ppc_inst_as_ulong(x); } -#define PPC_INST_STR_LEN sizeof("00000000 00000000") - -static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) -{ - if (ppc_inst_prefixed(x)) - sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); - else - sprintf(str, "%08x", ppc_inst_val(x)); - - return str; -} - -#define ppc_inst_as_str(x) \ -({ \ - char __str[PPC_INST_STR_LEN]; \ - __ppc_inst_as_str(__str, x); \ - __str; \ -}) - static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) { unsigned int val, suffix; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b14f54d789d2..8069dbc4b8d1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 73fcd5cdb662..fc112a91d0c2 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,7 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev; #include #include #include -#include #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 13f0409dd617..5c1516a5ba8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -54,7 +54,6 @@ extern void *softirq_ctx[NR_CPUS]; void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); -extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index a6be4025cba2..92a968202ba7 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32) #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET @@ -39,6 +39,17 @@ * c00e000000000000 << 3 + a80e000000000000 = c00fc00000000000 */ #define KASAN_SHADOW_END 0xc00fc00000000000UL + +#else + +/* + * The shadow ends before the highest accessible address + * because we don't need a shadow for the shadow. + * But it doesn't hurt to have a shadow for the shadow, + * keep shadow end aligned eases things. + */ +#define KASAN_SHADOW_END 0xc000200000000000UL + #endif #ifdef CONFIG_KASAN diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index d6f4edfe4737..f8d122d16af4 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -83,6 +83,7 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); +extern void crash_kexec_prepare(void); extern void crash_kexec_secondary(struct pt_regs *regs); int __init overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index bab364152b29..c8e4b4fd4e33 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -29,7 +29,7 @@ struct pt_regs; struct kprobe; -typedef ppc_opcode_t kprobe_opcode_t; +typedef u32 kprobe_opcode_t; extern kprobe_opcode_t optinsn_slot; diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 91c9f937edcd..bbf5e2c5fe09 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -280,9 +280,6 @@ extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); long kvmppc_read_intr(void); -void kvmppc_bad_interrupt(struct pt_regs *regs); -void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip); -void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip); void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2909a88acd16..c2b003550dc9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -523,7 +523,11 @@ struct kvm_vcpu_arch { struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif - struct pt_regs regs; + /* + * This is passed along to the HV via H_ENTER_NESTED. Align to + * prevent it crossing a real 4K page. + */ + struct pt_regs regs __aligned(512); struct thread_fp_state fp; @@ -830,11 +834,21 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ u64 cur_tb_start; /* when it started */ +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING + struct kvmhv_tb_accumulator vcpu_entry; + struct kvmhv_tb_accumulator vcpu_exit; + struct kvmhv_tb_accumulator in_guest; + struct kvmhv_tb_accumulator hcall; + struct kvmhv_tb_accumulator pg_fault; + struct kvmhv_tb_accumulator guest_entry; + struct kvmhv_tb_accumulator guest_exit; +#else struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ +#endif #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 6c1002043367..8cb83600c434 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -8,8 +8,6 @@ #include #include -#include - struct pt_regs; struct pci_bus; struct device_node; diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 1b024e64c8ec..17a77d47ed6d 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -12,6 +12,7 @@ #include #include #include +#include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 5f41565a1e5d..860d0290ca4d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -96,15 +96,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* Enable use of TLB reservation. Processor should support tlbsrx. - * instruction and MAS0[WQ]. - */ -#define MMU_FTR_USE_TLBRSRV ASM_CONST(0x00800000) - -/* Use paired MAS registers (MAS7||MAS3, etc.) - */ -#define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) - /* Doesn't support the B bit (1T segment) in SLBIE */ #define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000) @@ -180,9 +171,6 @@ enum { #ifdef CONFIG_PPC_83xx MMU_FTR_NEED_DTLB_SW_LRU | #endif -#ifdef CONFIG_PPC_BOOK3E_64 - MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | -#endif #ifdef CONFIG_PPC_BOOK3S_64 MMU_FTR_KERNEL_RO | #ifdef CONFIG_PPC_64S_HASH_MMU diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index ce1e0aabaa64..5ea16a71c2f0 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -15,7 +15,6 @@ #ifndef __ASSEMBLY__ #include -#include #include #endif /* __ASSEMBLY__ */ @@ -268,13 +267,14 @@ struct mpc52xx_intr { #ifndef __ASSEMBLY__ +struct device_node; + /* mpc52xx_common.c */ extern void mpc5200_setup_xlb_arbiter(void); extern void mpc52xx_declare_of_platform_devices(void); extern int mpc5200_psc_ac97_gpio_reset(int psc_number); extern void mpc52xx_map_common_devices(void); extern int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv); -extern unsigned int mpc52xx_get_xtal_freq(struct device_node *node); extern void __noreturn mpc52xx_restart(char *cmd); /* mpc52xx_gpt.c */ diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/asm/mpc5xxx.h index 2f60f5c5461b..44db26380435 100644 --- a/arch/powerpc/include/asm/mpc5xxx.h +++ b/arch/powerpc/include/asm/mpc5xxx.h @@ -11,7 +11,14 @@ #ifndef __ASM_POWERPC_MPC5xxx_H__ #define __ASM_POWERPC_MPC5xxx_H__ -extern unsigned long mpc5xxx_get_bus_frequency(struct device_node *node); +#include + +unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode); + +static inline unsigned long mpc5xxx_get_bus_frequency(struct device *dev) +{ + return mpc5xxx_fwnode_get_bus_frequency(dev_fwnode(dev)); +} #endif /* __ASM_POWERPC_MPC5xxx_H__ */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ea0e487f87b1..c3c7adef74de 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -5,8 +5,10 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); +void watchdog_nmi_set_timeout_pct(u64 pct); #else static inline void arch_touch_nmi_watchdog(void) {} +static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 668aee6017e7..e50b211becb3 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -15,7 +15,10 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + p4d_set(p4d, (unsigned long)pud); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 57083f95e82b..599921cc257e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -25,7 +25,7 @@ /* * Define the address range of the kernel non-linear virtual area */ -#define KERN_VIRT_START ASM_CONST(0x8000000000000000) +#define KERN_VIRT_START ASM_CONST(0xc000100000000000) #define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* @@ -38,15 +38,16 @@ #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* - * The second half of the kernel virtual space is used for IO mappings, + * The third quarter of the kernel virtual space is used for IO mappings, * it's itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces - * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE + * IOREMAP_BASE = ISA_IO_BASE + 2G to KERN_IO_START + KERN_IO_SIZE */ #define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) +#define KERN_IO_SIZE (KERN_VIRT_SIZE >> 2) #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) @@ -54,21 +55,9 @@ #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) +#define IOREMAP_END (KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M - -/* - * Region IDs - */ -#define REGION_SHIFT 60UL -#define REGION_MASK (0xfUL << REGION_SHIFT) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) -#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define USER_REGION_ID (0UL) - /* * Defines the address of the vmemap area, in its own region on * after the vmalloc space on Book3E @@ -83,8 +72,6 @@ */ #include -#define _PAGE_SAO 0 - #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index ac75f4ab0dba..b499da6c1a99 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -193,7 +193,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ stw%X0 %2,%0\n\ - eieio\n\ + mbar\n\ stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index c85f901227c9..e18c95f4e1d4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -170,11 +170,15 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) return bus->sysdata; } +#ifdef CONFIG_PPC_PMAC extern int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn); +#endif #ifndef CONFIG_PPC64 +#ifdef CONFIG_PPC_CHRP extern void pci_create_OF_bus_map(void); +#endif #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index f9da506751bb..289f1ec85bc5 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -14,7 +14,6 @@ #include #include -#include #include /* Return values for pci_controller_ops.probe_mode function */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 83e0f701ebc6..8239c0af5eb2 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -43,11 +43,10 @@ static inline long extended_cede_processor(unsigned long latency_hint) set_cede_latency_hint(latency_hint); rc = cede_processor(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* Ensure that H_CEDE returns with IRQs on */ - if (WARN_ON(!(mfmsr() & MSR_EE))) + if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & MSR_EE))) __hard_irq_enable(); -#endif set_cede_latency_hint(old_latency_hint); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 89beabf5325c..7b81b37a191e 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -290,7 +290,6 @@ #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e -#define PPC_INST_SETB 0x7c000100 #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_TRECHKPT 0x7c0007dd @@ -581,6 +580,9 @@ #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) +#define PPC_RAW_TW(t0, a, b) (0x7f000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) +#define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 6f66e358aa37..e77a2ed7d938 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -9,9 +9,9 @@ */ #include #include +#include -typedef u32 ppc_opcode_t; -#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ +#define BREAKPOINT_INSTRUCTION PPC_RAW_TRAP() /* trap */ /* Trap definitions per ISA */ #define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 5c80152e8f18..2e82820fbd64 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -12,15 +12,10 @@ * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. */ #include -#include -#include +#include -/* These includes should be removed once implicit includes are cleaned up. */ -#include -#include -#include -#include -#include +struct device_node; +struct property; #define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ #define OF_DT_END_NODE 0x2 /* End node */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 8fa37ef5da4d..d8c28902cf59 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -12,7 +12,6 @@ extern unsigned long long memory_limit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; -extern void note_scsi_host(struct device_node *, void *); /* Used in very early kernel initialization. */ extern unsigned long reloc_offset(void); @@ -85,6 +84,11 @@ void __init machine_init(u64 dt_ptr); void __init early_setup(unsigned long dt_ptr); void early_setup_secondary(void); +/* prom_init (OpenFirmware) */ +unsigned long __init prom_init(unsigned long r3, unsigned long r4, + unsigned long pp, unsigned long r6, + unsigned long r7, unsigned long kbase); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index 1d67bc8d7bc6..7130176d8cb8 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -14,7 +14,10 @@ extern void do_lwsync_fixups(unsigned long value, void *fixup_start, static inline void eieio(void) { - __asm__ __volatile__ ("eieio" : : : "memory"); + if (IS_ENABLED(CONFIG_BOOKE)) + __asm__ __volatile__ ("mbar" : : : "memory"); + else + __asm__ __volatile__ ("eieio" : : : "memory"); } static inline void isync(void) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 105f200b1e31..3ddc65c63a49 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -2,7 +2,6 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index a7ae1860115a..4fea116d3d37 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -12,7 +12,7 @@ #include #include -typedef ppc_opcode_t uprobe_opcode_t; +typedef u32 uprobe_opcode_t; #define MAX_UINSN_BYTES 8 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index f3f4710d4ff5..46c31fb8748d 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,7 +7,7 @@ #include #include -#include +#include #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c8cf924bf9c0..06d2d1f78f71 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET +# Remove stack protector to avoid triggering unneeded stack canary +# checks due to randomize_kstack_offset. +CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong +CFLAGS_syscall.o += -fno-stack-protector +#endif + obj-y := cputable.o syscalls.o \ irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ @@ -62,9 +69,9 @@ obj-y := cputable.o syscalls.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ - kdebugfs.o stacktrace.o + kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ -obj-$(CONFIG_PPC64) += setup_64.o \ +obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index eec536aef83a..8c10f536e478 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -379,7 +379,7 @@ int main(void) OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 8f69bb07e500..2769889219bf 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -73,7 +73,7 @@ static inline void rmci_maybe_off(void) * the display during identify_machine() and MMU_Init() * * The display is mapped to virtual address 0xD0000000, rather - * than 1:1, because some some CHRP machines put the frame buffer + * than 1:1, because some CHRP machines put the frame buffer * in the region starting at 0xC0000000 (PAGE_OFFSET). * This mapping is temporary and will disappear as soon as the * setup done by MMU_Init() is applied. diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a5dbfccd2047..d8e42ef750f1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -149,7 +149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970FX */ @@ -166,7 +165,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -183,7 +181,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970MP */ @@ -200,7 +197,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970MP, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970GX */ @@ -216,7 +212,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 8, .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* Power5 GR */ @@ -230,7 +225,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5", .platform = "power5", }, { /* Power5++ */ @@ -243,7 +237,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, - .oprofile_cpu_type = "ppc64/power5++", .platform = "power5+", }, { /* Power5 GS */ @@ -257,7 +250,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5+", .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -269,7 +261,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power5+", }, { /* Power6 */ @@ -284,7 +275,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power6", .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -296,7 +286,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power6", }, { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ @@ -309,7 +298,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -325,7 +313,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -341,7 +328,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER9, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .platform = "power9", @@ -356,7 +342,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER10, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .platform = "power10", @@ -373,7 +358,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -391,7 +375,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -409,7 +392,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -427,7 +409,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -445,7 +426,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -463,7 +443,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -481,7 +460,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -499,7 +477,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -517,7 +494,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -535,7 +511,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power10", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .machine_check_early = __machine_check_early_realmode_p10, @@ -554,7 +529,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 4, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/cell-be", .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -570,7 +544,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_PA6T, .cpu_setup = __setup_cpu_pa6t, .cpu_restore = __restore_cpu_pa6t, - .oprofile_cpu_type = "ppc64/pa6t", .platform = "pa6t", }, { /* default match */ @@ -734,7 +707,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -765,7 +737,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX rev 2.0 must disable HID0[DPM] */ .pvr_mask = 0xffffffff, @@ -781,7 +752,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX (All revs except 2.0) */ .pvr_mask = 0xffff0000, @@ -797,7 +767,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750GX */ .pvr_mask = 0xffff0000, @@ -813,7 +782,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 740/750 (L2CR bit need fixup for 740) */ .pvr_mask = 0xffff0000, @@ -891,7 +859,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -908,7 +875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -925,7 +891,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -942,7 +907,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -959,7 +923,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -976,7 +939,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -993,7 +955,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1010,7 +971,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1026,7 +986,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1043,7 +1002,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1060,7 +1018,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1172,7 +1129,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, { /* e300c4 (e300c1, plus one IU) */ @@ -1188,7 +1144,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, #endif @@ -1884,7 +1839,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v1, .machine_check = machine_check_e500, .platform = "ppc8540", @@ -1903,7 +1857,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", @@ -1922,7 +1875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", @@ -1943,7 +1895,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e5500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, @@ -1965,7 +1916,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, - .oprofile_cpu_type = "ppc/e6500", .cpu_setup = __setup_cpu_e6500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e6500, @@ -2033,23 +1983,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->pmc_type = old.pmc_type; /* - * If we have passed through this logic once before and - * have pulled the default case because the real PVR was - * not found inside cpu_specs[], then we are possibly - * running in compatibility mode. In that case, let the - * oprofiler know which set of compatibility counters to - * pull from by making sure the oprofile_cpu_type string - * is set to that of compatibility mode. If the - * oprofile_cpu_type already has a value, then we are - * possibly overriding a real PVR with a logical one, - * and, in that case, keep the current value for - * oprofile_cpu_type. Furthermore, let's ensure that the + * Let's ensure that the * fix for the PMAO bug is enabled on compatibility mode. */ - if (old.oprofile_cpu_type != NULL) { - t->oprofile_cpu_type = old.oprofile_cpu_type; - t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; - } + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 30d4eca88d17..909a05cd2809 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -11,6 +11,7 @@ #include #include #include +#include bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2ad365c21afa..fc800a9fb2c4 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .dcache_bsize = 32, /* cache info init. */ .num_pmcs = 0, .pmc_type = PPC_PMC_DEFAULT, - .oprofile_cpu_type = NULL, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, .machine_check_early = NULL, @@ -387,7 +386,6 @@ static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power8"; return 1; } @@ -423,7 +421,6 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power9"; return 1; } @@ -449,7 +446,6 @@ static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; return 1; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 260273e56431..f279295179bd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -750,7 +750,7 @@ static void eeh_pe_cleanup(struct eeh_pe *pe) * @pdev: pci_dev to check * * This function may return a false positive if we can't determine the slot's - * presence state. This might happen for for PCIe slots if the PE containing + * presence state. This might happen for PCIe slots if the PE containing * the upstream bridge is also frozen, or the bridge is part of the same PE * as the device. * diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b66dd6f775a4..3d0dc133a9ae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2779,7 +2779,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a decrementer interrupt, we bump the dec to max and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index d3eea633d11a..cf2c08902c05 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -965,6 +965,9 @@ start_here_multiplatform: * and SLB setup before we turn on relocation. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 LOAD_REG_ADDR(r12, DOTSYM(early_setup)) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6c739beb938c..519b60695167 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -418,14 +418,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 784ea3289c84..0e75cb03244a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -24,8 +24,6 @@ unsigned long global_dbcr0[NR_CPUS]; #endif -typedef long (*syscall_fn)(long, long, long, long, long, long); - #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -73,165 +71,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -/* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) -{ - syscall_fn f; - - kuap_lock(); - - regs->orig_gpr3 = r3; - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; - - } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - return -ENOSYS; - } - - /* May be faster to do array_index_nospec? */ - barrier_nospec(); - - if (unlikely(is_compat_task())) { - f = (void *)compat_sys_call_table[r0]; - - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; - - } else { - f = (void *)sys_call_table[r0]; - } - - return f(r3, r4, r5, r6, r7, r8); -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..caebe1431596 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -775,6 +775,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0822a274a549..0f17268c1f0b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,13 +65,8 @@ #include #include #include +#include -#ifdef CONFIG_PPC64 -#include -#include -#include -#include -#endif #define CREATE_TRACE_POINTS #include #include @@ -88,411 +83,6 @@ u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 - -int distribute_irqs = 1; - -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - -void replay_soft_interrupts(void) -{ - struct pt_regs regs; - - /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * We use local_paca rather than get_paca() to avoid all the - * debug_smp_processor_id() business in this low level function. - */ - - ppc_save_regs(®s); - regs.softe = IRQS_ENABLED; - regs.msr |= MSR_EE; - -again: - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * Check if an hypervisor Maintenance interrupt happened. - * This is a higher priority interrupt than the others, so - * replay it first. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = INTERRUPT_HMI; - handle_hmi_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = INTERRUPT_DECREMENTER; - timer_interrupt(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = INTERRUPT_EXTERNAL; - do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - regs.trap = INTERRUPT_DOORBELL; - doorbell_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = INTERRUPT_PERFMON; - performance_monitor_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - goto again; - } -} - -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) -{ - unsigned long kuap_state = get_kuap(); - - /* - * Check if anything calls local_irq_enable/restore() when KUAP is - * disabled (user access enabled). We handle that case here by saving - * and re-locking AMR but we shouldn't get here in the first place, - * hence the warning. - */ - kuap_assert_locked(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(AMR_KUAP_BLOCKED); - - replay_soft_interrupts(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(kuap_state); -} -#else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() -#endif - -notrace void arch_local_irq_restore(unsigned long mask) -{ - unsigned char irq_happened; - - /* Write the new soft-enabled value if it is a disable */ - if (mask) { - irq_soft_mask_set(mask); - return; - } - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); - - /* - * After the stb, interrupts are unmasked and there are no interrupts - * pending replay. The restart sequence makes this atomic with - * respect to soft-masked interrupts. If this was just a simple code - * sequence, a soft-masked interrupt could become pending right after - * the comparison and before the stb. - * - * This allows interrupts to be unmasked without hard disabling, and - * also without new hard interrupts coming in ahead of pending ones. - */ - asm_volatile_goto( -"1: \n" -" lbz 9,%0(13) \n" -" cmpwi 9,0 \n" -" bne %l[happened] \n" -" stb 9,%1(13) \n" -"2: \n" - RESTART_TABLE(1b, 2b, 1b) - : : "i" (offsetof(struct paca_struct, irq_happened)), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "cr0", "r9" - : happened); - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); - - return; - -happened: - irq_happened = get_irq_happened(); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!irq_happened); - - if (irq_happened == PACA_IRQ_HARD_DIS) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - irq_soft_mask_set(IRQS_ENABLED); - local_paca->irq_happened = 0; - __hard_irq_enable(); - return; - } - - /* Have interrupts to replay, need to hard disable first */ - if (!(irq_happened & PACA_IRQ_HARD_DIS)) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (!(mfmsr() & MSR_EE)) { - /* - * An interrupt could have come in and cleared - * MSR[EE] and set IRQ_HARD_DIS, so check - * IRQ_HARD_DIS again and warn if it is still - * clear. - */ - irq_happened = get_irq_happened(); - WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); - } - } - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - } else { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); - } - } - - /* - * Disable preempt here, so that the below preempt_enable will - * perform resched if required (a replayed interrupt may set - * need_resched). - */ - preempt_disable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - - replay_soft_interrupts_irqrestore(); - local_paca->irq_happened = 0; - - trace_hardirqs_on(); - irq_soft_mask_set(IRQS_ENABLED); - __hard_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(arch_local_irq_restore); - -/* - * This is a helper to use when about to go into idle low-power - * when the latter has the side effect of re-enabling interrupts - * (such as calling H_CEDE under pHyp). - * - * You call this function with interrupts soft-disabled (this is - * already the case when ppc_md.power_save is called). The function - * will return whether to enter power save or just return. - * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard - * disabled and marked as such, so the local_irq_enable() call - * in arch_cpu_idle() will properly re-enable everything. - */ -bool prep_irq_for_idle(void) -{ - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - /* - * Mark interrupts as soft-enabled and clear the - * PACA_IRQ_HARD_DIS from the pending mask since we - * are about to hard enable as well as a side effect - * of entering the low power state. - */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - irq_soft_mask_set(IRQS_ENABLED); - - /* Tell the caller to enter the low power state */ - return true; -} - -#ifdef CONFIG_PPC_BOOK3S -/* - * This is for idle sequences that return with IRQs off, but the - * idle state itself wakes on interrupt. Tell the irq tracer that - * IRQs are enabled for the duration of idle so it does not get long - * off times. Must be paired with fini_irq_for_idle_irqsoff. - */ -bool prep_irq_for_idle_irqsoff(void) -{ - WARN_ON(!irqs_disabled()); - - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - return true; -} - -/* - * Take the SRR1 wakeup reason, index into this table to find the - * appropriate irq_happened bit. - * - * Sytem reset exceptions taken in idle state also come through here, - * but they are NMI interrupts so do not need to wait for IRQs to be - * restored, and should be taken as early as practical. These are marked - * with 0xff in the table. The Power ISA specifies 0100b as the system - * reset interrupt reason. - */ -#define IRQ_SYSTEM_RESET 0xff - -static const u8 srr1_to_lazyirq[0x10] = { - 0, 0, 0, - PACA_IRQ_DBELL, - IRQ_SYSTEM_RESET, - PACA_IRQ_DBELL, - PACA_IRQ_DEC, - 0, - PACA_IRQ_EE, - PACA_IRQ_EE, - PACA_IRQ_HMI, - 0, 0, 0, 0, 0 }; - -void replay_system_reset(void) -{ - struct pt_regs regs; - - ppc_save_regs(®s); - regs.trap = 0x100; - get_paca()->in_nmi = 1; - system_reset_exception(®s); - get_paca()->in_nmi = 0; -} -EXPORT_SYMBOL_GPL(replay_system_reset); - -void irq_set_pending_from_srr1(unsigned long srr1) -{ - unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; - u8 reason = srr1_to_lazyirq[idx]; - - /* - * Take the system reset now, which is immediately after registers - * are restored from idle. It's an NMI, so interrupts need not be - * re-enabled before it is taken. - */ - if (unlikely(reason == IRQ_SYSTEM_RESET)) { - replay_system_reset(); - return; - } - - if (reason == PACA_IRQ_DBELL) { - /* - * When doorbell triggers a system reset wakeup, the message - * is not cleared, so if the doorbell interrupt is replayed - * and the IPI handled, the doorbell interrupt would still - * fire when EE is enabled. - * - * To avoid taking the superfluous doorbell interrupt, - * execute a msgclr here before the interrupt is replayed. - */ - ppc_msgclr(PPC_DBELL_MSGTYPE); - } - - /* - * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, - * so this can be called unconditionally with the SRR1 wake - * reason as returned by the idle code, which uses 0 to mean no - * interrupt. - * - * If a future CPU was to designate this as an interrupt reason, - * then a new index for no interrupt must be assigned. - */ - local_paca->irq_happened |= reason; -} -#endif /* CONFIG_PPC_BOOK3S */ - -/* - * Force a replay of the external interrupt handler on this CPU. - */ -void force_external_irq_replay(void) -{ - /* - * This must only be called with interrupts soft-disabled, - * the replay will happen when re-enabling. - */ - WARN_ON(!arch_irqs_disabled()); - - /* - * Interrupts must always be hard disabled before irq_happened is - * modified (to prevent lost update in case of interrupt between - * load and store). - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* Indicate in the PACA that we have an interrupt to replay */ - local_paca->irq_happened |= PACA_IRQ_EE; -} - -#endif /* CONFIG_PPC64 */ - int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -595,17 +185,15 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -static inline void check_stack_overflow(void) +static inline void check_stack_overflow(unsigned long sp) { - long sp; - if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) return; - sp = current_stack_pointer & (THREAD_SIZE - 1); + sp &= THREAD_SIZE - 1; - /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < 2048)) { + /* check for stack overflow: is there less than 1/4th free? */ + if (unlikely(sp < THREAD_SIZE / 4)) { pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } @@ -632,36 +220,16 @@ static __always_inline void call_do_softirq(const void *sp) } #endif -static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) -{ - register unsigned long r3 asm("r3") = (unsigned long)regs; - - /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ - asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" - "mr %%r1, %[sp] ;" - "bl %[callee] ;" - PPC_LL " %%r1, 0(%%r1) ;" - : // Outputs - "+r" (r3) - : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), - [callee] "i" (__do_irq) - : // Clobbers - "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", - "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" - ); -} - DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs, unsigned long oldsp) { unsigned int irq; trace_irq_entry(regs); + check_stack_overflow(oldsp); + /* * Query the platform PIC for the interrupt & ack it. * @@ -682,6 +250,29 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r4, %%r1 ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -692,16 +283,11 @@ void __do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; - check_stack_overflow(); - - /* Already there ? */ - if (unlikely(cursp == irqsp || cursp == sirqsp)) { - __do_irq(regs); - set_irq_regs(old_regs); - return; - } - /* Switch stack and call */ - call_do_irq(regs, irqsp); + /* Already there ? If not switch stack and call */ + if (unlikely(cursp == irqsp || cursp == sirqsp)) + __do_irq(regs, current_stack_pointer); + else + call_do_irq(regs, irqsp); set_irq_regs(old_regs); } @@ -798,13 +384,3 @@ int irq_choose_cpu(const struct cpumask *mask) return hard_smp_processor_id(); } #endif - -#ifdef CONFIG_PPC64 -static int __init setup_noirqdistrib(char *str) -{ - distribute_irqs = 0; - return 1; -} - -__setup("noirqdistrib", setup_noirqdistrib); -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c new file mode 100644 index 000000000000..01645e03e9f0 --- /dev/null +++ b/arch/powerpc/kernel/irq_64.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int distribute_irqs = 1; + +void replay_soft_interrupts(void) +{ + struct pt_regs regs; + + /* + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. + */ + + ppc_save_regs(®s); + regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; + +again: + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { + local_paca->irq_happened &= ~PACA_IRQ_HMI; + regs.trap = INTERRUPT_HMI; + handle_hmi_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; + regs.trap = INTERRUPT_DECREMENTER; + timer_interrupt(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; + regs.trap = INTERRUPT_EXTERNAL; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + regs.trap = INTERRUPT_DOORBELL; + doorbell_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + regs.trap = INTERRUPT_PERFMON; + performance_monitor_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); + goto again; + } +} + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_assert_locked(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(in_nmi() || in_hardirq()); + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = READ_ONCE(local_paca->irq_happened); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = READ_ONCE(local_paca->irq_happened); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in arch_cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ENABLED); + + /* Tell the caller to enter the low power state */ + return true; +} + +#ifdef CONFIG_PPC_BOOK3S +/* + * This is for idle sequences that return with IRQs off, but the + * idle state itself wakes on interrupt. Tell the irq tracer that + * IRQs are enabled for the duration of idle so it does not get long + * off times. Must be paired with fini_irq_for_idle_irqsoff. + */ +bool prep_irq_for_idle_irqsoff(void) +{ + WARN_ON(!irqs_disabled()); + + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + return true; +} + +/* + * Take the SRR1 wakeup reason, index into this table to find the + * appropriate irq_happened bit. + * + * Sytem reset exceptions taken in idle state also come through here, + * but they are NMI interrupts so do not need to wait for IRQs to be + * restored, and should be taken as early as practical. These are marked + * with 0xff in the table. The Power ISA specifies 0100b as the system + * reset interrupt reason. + */ +#define IRQ_SYSTEM_RESET 0xff + +static const u8 srr1_to_lazyirq[0x10] = { + 0, 0, 0, + PACA_IRQ_DBELL, + IRQ_SYSTEM_RESET, + PACA_IRQ_DBELL, + PACA_IRQ_DEC, + 0, + PACA_IRQ_EE, + PACA_IRQ_EE, + PACA_IRQ_HMI, + 0, 0, 0, 0, 0 }; + +void replay_system_reset(void) +{ + struct pt_regs regs; + + ppc_save_regs(®s); + regs.trap = 0x100; + get_paca()->in_nmi = 1; + system_reset_exception(®s); + get_paca()->in_nmi = 0; +} +EXPORT_SYMBOL_GPL(replay_system_reset); + +void irq_set_pending_from_srr1(unsigned long srr1) +{ + unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; + u8 reason = srr1_to_lazyirq[idx]; + + /* + * Take the system reset now, which is immediately after registers + * are restored from idle. It's an NMI, so interrupts need not be + * re-enabled before it is taken. + */ + if (unlikely(reason == IRQ_SYSTEM_RESET)) { + replay_system_reset(); + return; + } + + if (reason == PACA_IRQ_DBELL) { + /* + * When doorbell triggers a system reset wakeup, the message + * is not cleared, so if the doorbell interrupt is replayed + * and the IPI handled, the doorbell interrupt would still + * fire when EE is enabled. + * + * To avoid taking the superfluous doorbell interrupt, + * execute a msgclr here before the interrupt is replayed. + */ + ppc_msgclr(PPC_DBELL_MSGTYPE); + } + + /* + * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, + * so this can be called unconditionally with the SRR1 wake + * reason as returned by the idle code, which uses 0 to mean no + * interrupt. + * + * If a future CPU was to designate this as an interrupt reason, + * then a new index for no interrupt must be assigned. + */ + local_paca->irq_happened |= reason; +} +#endif /* CONFIG_PPC_BOOK3S */ + +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 1c97c0f177ae..912d4f8a13be 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -269,7 +269,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); + printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 18173199b79d..6c5d30fba766 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -756,7 +756,7 @@ void __init mce_init(void) mce_info = memblock_alloc_try_nid(sizeof(*mce_info), __alignof__(*mce_info), MEMBLOCK_LOW_LIMIT, - limit, cpu_to_node(i)); + limit, early_cpu_to_node(i)); if (!mce_info) goto err; paca_ptrs[i]->mce_info = mce_info; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 068410cd54a3..bdd3332200c5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "../../../drivers/pci/pci.h" @@ -74,16 +75,32 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) { + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } + } + if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -95,10 +112,7 @@ static int get_phb_number(struct device_node *dn) if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) return phb_id; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); @@ -1087,7 +1101,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) */ pci_read_bridge_bases(bus); - /* Now fixup the bus bus */ + /* Now fixup the bus */ pcibios_setup_bus_self(bus); } EXPORT_SYMBOL(pcibios_fixup_bus); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 5a174936c9a0..433965bf37b4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -36,18 +36,13 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void __init pcibios_make_OF_bus_map(void); - static void fixup_cpc710_pci64(struct pci_dev* dev); -static u8* pci_to_OF_bus_map; /* By default, we don't re-assign bus numbers. We do this only on * some pmacs */ static int pci_assign_all_buses; -static int pci_bus_count; - /* This will remain NULL for now, until isa-bridge.c is made common * to both 32-bit and 64-bit. */ @@ -67,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) + +static u8* pci_to_OF_bus_map; +static int pci_bus_count; + /* * Functions below are used on OpenFirmware machines. */ @@ -108,7 +108,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void __init +static void __init pcibios_make_OF_bus_map(void) { int i; @@ -154,6 +154,7 @@ pcibios_make_OF_bus_map(void) } +#ifdef CONFIG_PPC_PMAC /* * Returns the PCI device matching a given OF node */ @@ -193,7 +194,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) return -ENODEV; } EXPORT_SYMBOL(pci_device_from_OF_node); +#endif +#ifdef CONFIG_PPC_CHRP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -218,6 +221,9 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } +#endif + +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ void pcibios_setup_phb_io_space(struct pci_controller *hose) { @@ -233,7 +239,9 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT int next_busno = 0; +#endif printk(KERN_INFO "PCI: Probing PCI hardware\n"); @@ -242,14 +250,20 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses) hose->first_busno = next_busno; +#endif hose->last_busno = 0xff; pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; +#endif } + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -258,6 +272,7 @@ static int __init pcibios_init(void) */ if (pci_assign_all_buses) pcibios_make_OF_bus_map(); +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 19b03ddf5631..0c7cfb9fab04 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -286,6 +286,7 @@ int pcibus_to_node(struct pci_bus *bus) EXPORT_SYMBOL(pcibus_to_node); #endif +#ifdef CONFIG_PPC_PMAC int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) { if (!PCI_DN(np)) @@ -294,3 +295,4 @@ int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) *devfn = PCI_DN(np)->devfn; return 0; } +#endif diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 938ab8838ab5..7a35fc25a304 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -259,7 +259,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) if (edev) { /* * We allocate pci_dn's for the totalvfs count, - * but only only the vfs that were activated + * but only the vfs that were activated * have a configured PE. */ if (edev->pe) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index feae8509b59c..a730b951b64b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,6 +54,7 @@ #include #include #include +#include #include @@ -751,6 +752,13 @@ void __init early_init_devtree(void *params) early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 13d6cb188835..a6669c40c1db 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index 1da4303128ef..7df08004c47d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -71,7 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, } /* - * Currently to set and and get all the vsx state, you need to call + * Currently to set and get all the vsx state, you need to call * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5761f08dae95..2b2d0b0fbb30 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -113,7 +113,6 @@ void __init setup_tlb_core_data(void) * Should we panic instead? */ WARN_ONCE(smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && book3e_htw_mode != PPC_HTW_E6500, "%s: unsupported MMU configuration\n", __func__); } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 472596a109e2..86bb5bb4c143 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -377,9 +377,12 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* - * Force reload of FP/VEC. - * This has to be done before copying stuff into tsk->thread.fpr/vr - * for the reasons explained in the previous comment. + * Force reload of FP/VEC/VSX so userspace sees any changes. + * Clear these bits from the user process' MSR before copying into the + * thread struct. If we are rescheduled or preempted and another task + * uses FP/VEC/VSX, and this process has the MSR bits set, then the + * context switch code will save the current CPU state into the + * thread_struct - possibly overwriting the data we are updating here. */ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bcefab484ea6..6b850c157a62 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,6 @@ #endif #include #include -#include #include #include #include @@ -619,20 +619,6 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif -#ifdef CONFIG_NMI_IPI -static void crash_stop_this_cpu(struct pt_regs *regs) -#else -static void crash_stop_this_cpu(void *dummy) -#endif -{ - /* - * Just busy wait here and avoid marking CPU as offline to ensure - * register data is captured appropriately. - */ - while (1) - cpu_relax(); -} - void crash_smp_send_stop(void) { static bool stopped = false; @@ -651,11 +637,14 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); -#else - smp_call_function(crash_stop_this_cpu, NULL, 0); -#endif /* CONFIG_NMI_IPI */ +#ifdef CONFIG_KEXEC_CORE + if (kexec_crash_image) { + crash_kexec_prepare(); + return; + } +#endif + + smp_send_stop(); } #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c new file mode 100644 index 000000000000..81ace9e8b72b --- /dev/null +++ b/arch/powerpc/kernel/syscall.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +typedef long (*syscall_fn)(long, long, long, long, long, long); + +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, + long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) +{ + long ret; + syscall_fn f; + + kuap_lock(); + + add_random_kstack_offset(); + regs->orig_gpr3 = r3; + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_assert_locked(); + + booke_restore_dbcr0(); + + account_cpu_user_entry(); + + account_stolen_time(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + + local_irq_enable(); + + if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + r3 = regs->gpr[3]; + r4 = regs->gpr[4]; + r5 = regs->gpr[5]; + r6 = regs->gpr[6]; + r7 = regs->gpr[7]; + r8 = regs->gpr[8]; + + } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + + if (unlikely(is_compat_task())) { + f = (void *)compat_sys_call_table[r0]; + + r3 &= 0x00000000ffffffffULL; + r4 &= 0x00000000ffffffffULL; + r5 &= 0x00000000ffffffffULL; + r6 &= 0x00000000ffffffffULL; + r7 &= 0x00000000ffffffffULL; + r8 &= 0x00000000ffffffffULL; + + } else { + f = (void *)sys_call_table[r0]; + } + + ret = f(r3, r4, r5, r6, r7, r8); + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: the powerpc architecture + * may have two kinds of stack alignment (16-bytes and 8-bytes). + * + * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. + */ + choose_random_kstack_offset(mftb()); + + return ret; +} diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2a893e06e4f1..cb158c32b50b 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -69,8 +69,8 @@ ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%s) != old (%s)", - (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); return -EINVAL; } @@ -125,9 +125,9 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -159,8 +159,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %s around bl _mcount\n", - ppc_inst_as_str(op)); + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); return -EINVAL; } } else if (IS_ENABLED(CONFIG_PPC64)) { @@ -174,7 +174,8 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); return -EINVAL; } } @@ -310,9 +311,9 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -416,8 +417,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %s %s\n", - ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); return -EINVAL; } @@ -486,7 +487,8 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); return -EINVAL; } @@ -562,9 +564,9 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3aaa50e5c72f..dadfcef5d6db 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1676,7 +1676,7 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 static void tm_unavailable(struct pt_regs *regs) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index d4e43ab2d5df..0085ae464dac 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -91,6 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 3: crclr cr0*4+so sync + icbi 0,r1 isync li r3,0 blr diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d28b9553654..dbcc4a793f0b 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -91,6 +91,10 @@ static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +#ifdef CONFIG_PPC_PSERIES +static u64 wd_timeout_pct; +#endif + /* * Try to take the exclusive watchdog action / NMI IPI / printing lock. * wd_smp_lock must be held. If this fails, we should return and wait @@ -353,7 +357,7 @@ static void watchdog_timer_interrupt(int cpu) if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { /* * Something has called printk from NMI context. It might be - * stuck, so this this triggers a flush that will get that + * stuck, so this triggers a flush that will get that * printk output to the console. * * See wd_lockup_ipi. @@ -527,7 +531,13 @@ static int stop_watchdog_on_cpu(unsigned int cpu) static void watchdog_calc_timeouts(void) { - wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + u64 threshold = watchdog_thresh; + +#ifdef CONFIG_PPC_PSERIES + threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100; +#endif + + wd_panic_timeout_tb = threshold * ppc_tb_freq; /* Have the SMP detector trigger a bit later */ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; @@ -570,3 +580,12 @@ int __init watchdog_nmi_probe(void) } return 0; } + +#ifdef CONFIG_PPC_PSERIES +void watchdog_nmi_set_timeout_pct(u64 pct) +{ + pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); + WRITE_ONCE(wd_timeout_pct, pct); + lockup_detector_reconfigure(); +} +#endif diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 7ab4980fe13a..cf84bfe9e27e 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include void machine_kexec_mask_interrupts(void) { unsigned int i; diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 80f54723cf6d..252724ed666a 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -40,6 +40,14 @@ #define REAL_MODE_TIMEOUT 10000 static int time_to_dump; + +/* + * In case of system reset, secondary CPUs enter crash_kexec_secondary with out + * having to send an IPI explicitly. So, indicate if the crash is via + * system reset to avoid sending another IPI. + */ +static int is_via_system_reset; + /* * crash_wake_offline should be set to 1 by platforms that intend to wake * up offline cpus prior to jumping to a kdump kernel. Currently powernv @@ -101,7 +109,7 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { unsigned int msecs; volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ @@ -113,7 +121,15 @@ static void crash_kexec_prepare_cpus(int cpu) if (crash_wake_offline) ncpus = num_present_cpus() - 1; - crash_send_ipi(crash_ipi_callback); + /* + * If we came in via system reset, secondaries enter via crash_kexec_secondary(). + * So, wait a while for the secondary CPUs to enter for that case. + * Else, send IPI to all other CPUs. + */ + if (is_via_system_reset) + mdelay(PRIMARY_TIMEOUT); + else + crash_send_ipi(crash_ipi_callback); smp_wmb(); again: @@ -202,7 +218,7 @@ void crash_kexec_secondary(struct pt_regs *regs) #else /* ! CONFIG_SMP */ -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { /* * move the secondaries to us so that we can copy @@ -248,6 +264,32 @@ noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) static inline void crash_kexec_wait_realmode(int cpu) {} #endif /* CONFIG_SMP && CONFIG_PPC64 */ +void crash_kexec_prepare(void) +{ + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ + printk_deferred_enter(); + + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + hard_irq_disable(); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + + crash_kexec_prepare_cpus(); +} + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. @@ -311,35 +353,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ - printk_deferred_enter(); - - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means stopping other cpus in - * an SMP system. - * The kernel is broken so disable interrupts. - */ - hard_irq_disable(); - - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - - /* - * If we came in via system reset, wait a while for the secondary - * CPUs to enter. - */ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) - mdelay(PRIMARY_TIMEOUT); + is_via_system_reset = 1; - crash_kexec_prepare_cpus(crashing_cpu); + crash_smp_send_stop(); crash_save_cpu(regs, crashing_cpu); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index b4981b651d9a..683462e4556b 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1038,6 +1039,48 @@ out: return ret; } +static int copy_property(void *fdt, int node_offset, const struct device_node *dn, + const char *propname) +{ + const void *prop, *fdtprop; + int len = 0, fdtlen = 0, ret; + + prop = of_get_property(dn, propname, &len); + fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen); + + if (fdtprop && !prop) + ret = fdt_delprop(fdt, node_offset, propname); + else if (prop) + ret = fdt_setprop(fdt, node_offset, propname, prop, len); + + return ret; +} + +static int update_pci_dma_nodes(void *fdt, const char *dmapropname) +{ + struct device_node *dn; + int pci_offset, root_offset, ret = 0; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + root_offset = fdt_path_offset(fdt, "/"); + for_each_node_with_property(dn, dmapropname) { + pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn)); + if (pci_offset < 0) + continue; + + ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); + if (ret < 0) + break; + ret = copy_property(fdt, pci_offset, dn, dmapropname); + if (ret < 0) + break; + } + + return ret; +} + /** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. @@ -1099,6 +1142,18 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, if (ret < 0) goto out; +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" +#define DMA64_PROPNAME "linux,dma64-ddr-window-info" + ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); + if (ret < 0) + goto out; + + ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); + if (ret < 0) + goto out; +#undef DMA64_PROPNAME +#undef DIRECT64_PROPNAME + /* Update memory reserve map */ ret = get_reserved_memory_ranges(&rmem); if (ret) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ddd88179110a..dcb398d5e009 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -128,8 +128,25 @@ config KVM_BOOK3S_64_PR and system calls on the host. config KVM_BOOK3S_HV_EXIT_TIMING - bool "Detailed timing for hypervisor real-mode code" + bool + +config KVM_BOOK3S_HV_P9_TIMING + bool "Detailed timing for the P9 entry point" + select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS + help + Calculate time taken for each vcpu during vcpu entry and + exit, time spent inside the guest and time spent handling + hypercalls and page faults. The total, minimum and maximum + times in nanoseconds together with the number of executions + are reported in debugfs in kvm/vm#/vcpu#/timings. + + If unsure, say N. + +config KVM_BOOK3S_HV_P8_TIMING + bool "Detailed timing for hypervisor real-mode code (for POWER8)" + select KVM_BOOK3S_HV_EXIT_TIMING + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS && !KVM_BOOK3S_HV_P9_TIMING help Calculate time taken for each vcpu in the real-mode guest entry, exit, and interrupt handling code, plus time spent in the guest @@ -149,7 +166,7 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND Old nested HV capable Linux guests have a bug where they don't reflect the PMU in-use status of their L2 guest to the L0 host while the L2 PMU registers are live. This can result in loss - of L2 PMU register state, causing perf to not work correctly in + of L2 PMU register state, causing perf to not work correctly in L2 guests. Selecting this option for the L0 host implements a workaround for diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 0cd23ce07d68..5319d889b184 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -86,6 +86,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + book3s_hv_p9_perf.o \ $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) endif diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 42851c32ff3b..9d4b3feda3b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Supported radix tree geometry. diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d6589c4fe889..40864373ef87 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -307,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return ret; ret = -ENOMEM; - stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL); + stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN); if (!stt) goto fail_acct; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 631062cde6b4..57d0835e56fd 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2660,11 +2660,21 @@ static struct debugfs_timings_element { const char *name; size_t offset; } timings[] = { +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING + {"vcpu_entry", offsetof(struct kvm_vcpu, arch.vcpu_entry)}, + {"guest_entry", offsetof(struct kvm_vcpu, arch.guest_entry)}, + {"in_guest", offsetof(struct kvm_vcpu, arch.in_guest)}, + {"guest_exit", offsetof(struct kvm_vcpu, arch.guest_exit)}, + {"vcpu_exit", offsetof(struct kvm_vcpu, arch.vcpu_exit)}, + {"hypercall", offsetof(struct kvm_vcpu, arch.hcall)}, + {"page_fault", offsetof(struct kvm_vcpu, arch.pg_fault)}, +#else {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +#endif }; #define N_TIMINGS (ARRAY_SIZE(timings)) @@ -2783,8 +2793,9 @@ static const struct file_operations debugfs_timings_ops = { /* Create a debugfs directory for the vcpu */ static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, - &debugfs_timings_ops); + if (cpu_has_feature(CPU_FTR_ARCH_300) == IS_ENABLED(CONFIG_KVM_BOOK3S_HV_P9_TIMING)) + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, + &debugfs_timings_ops); return 0; } @@ -4005,8 +4016,10 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); switch_pmu_to_guest(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), __pa(&vcpu->arch.regs)); + accumulate_time(vcpu, &vcpu->arch.guest_exit); kvmhv_restore_hv_return_state(vcpu, &hvregs); switch_pmu_to_host(vcpu, &host_os_sprs); vcpu->arch.shregs.msr = vcpu->arch.regs.msr; @@ -4694,6 +4707,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm *kvm; unsigned long msr; + start_timing(vcpu, &vcpu->arch.vcpu_entry); + if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; @@ -4759,6 +4774,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { + accumulate_time(vcpu, &vcpu->arch.guest_entry); if (cpu_has_feature(CPU_FTR_ARCH_300)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); @@ -4766,6 +4782,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_run_vcpu(vcpu); if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { + accumulate_time(vcpu, &vcpu->arch.hcall); + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { /* * These should have been caught reflected @@ -4781,6 +4799,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { + accumulate_time(vcpu, &vcpu->arch.pg_fault); srcu_idx = srcu_read_lock(&kvm->srcu); r = kvmppc_book3s_hv_page_fault(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); @@ -4792,12 +4811,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_xics_rm_complete(vcpu, 0); } } while (is_kvmppc_resume_guest(r)); + accumulate_time(vcpu, &vcpu->arch.vcpu_exit); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); srr_regs_clobbered(); + end_timing(vcpu); + return r; } @@ -5643,7 +5665,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); - /* invalidate the entry (what do do on error from the above ?) */ + /* invalidate the entry (what to do on error from the above ?) */ pimap->mapped[i].r_hwirq = 0; /* diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h index 6b7f07d9026b..2f2e59d7d433 100644 --- a/arch/powerpc/kvm/book3s_hv.h +++ b/arch/powerpc/kvm/book3s_hv.h @@ -40,3 +40,13 @@ void switch_pmu_to_guest(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); void switch_pmu_to_host(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); + +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next); +#define start_timing(vcpu, next) accumulate_time(vcpu, next) +#define end_timing(vcpu) accumulate_time(vcpu, NULL) +#else +#define accumulate_time(vcpu, next) do {} while (0) +#define start_timing(vcpu, next) do {} while (0) +#define end_timing(vcpu) do {} while (0) +#endif diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 88a8f6473c4e..da85f046377a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); int kvmppc_hwrng_present(void) { - return powernv_hwrng_present(); + return ppc_md.get_random_seed != NULL; } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) + if (ppc_md.get_random_seed && + ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; @@ -489,24 +490,6 @@ static long kvmppc_read_one_intr(bool *again) return kvmppc_check_passthru(xisr, xirr, again); } -void kvmppc_bad_interrupt(struct pt_regs *regs) -{ - /* - * 100 could happen at any time, 200 can happen due to invalid real - * address access for example (or any time due to a hardware problem). - */ - if (TRAP(regs) == 0x100) { - get_paca()->in_nmi++; - system_reset_exception(regs); - get_paca()->in_nmi--; - } else if (TRAP(regs) == 0x200) { - machine_check_exception(regs); - } else { - die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); - } - panic("Bad KVM trap"); -} - static void kvmppc_end_cede(struct kvm_vcpu *vcpu) { vcpu->arch.ceded = 0; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0644732d1a25..be8249cc6107 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct patb_entry *pseries_partition_tb; diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 112a09b33328..34f1db212824 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -3,231 +3,10 @@ #include #include #include -#include -#include #include #include "book3s_hv.h" -static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) -{ - if (!(mmcr0 & MMCR0_FC)) - goto do_freeze; - if (mmcra & MMCRA_SAMPLE_ENABLE) - goto do_freeze; - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (!(mmcr0 & MMCR0_PMCCEXT)) - goto do_freeze; - if (!(mmcra & MMCRA_BHRB_DISABLE)) - goto do_freeze; - } - return; - -do_freeze: - mmcr0 = MMCR0_FC; - mmcra = 0; - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mmcr0 |= MMCR0_PMCCEXT; - mmcra = MMCRA_BHRB_DISABLE; - } - - mtspr(SPRN_MMCR0, mmcr0); - mtspr(SPRN_MMCRA, mmcra); - isync(); -} - -void switch_pmu_to_guest(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - struct lppaca *lp; - int load_pmu = 1; - - lp = vcpu->arch.vpa.pinned_addr; - if (lp) - load_pmu = lp->pmcregs_in_use; - - /* Save host */ - if (ppc_get_pmu_inuse()) { - /* - * It might be better to put PMU handling (at least for the - * host) in the perf subsystem because it knows more about what - * is being used. - */ - - /* POWER9, POWER10 do not implement HPMC or SPMC */ - - host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); - host_os_sprs->mmcra = mfspr(SPRN_MMCRA); - - freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); - - host_os_sprs->pmc1 = mfspr(SPRN_PMC1); - host_os_sprs->pmc2 = mfspr(SPRN_PMC2); - host_os_sprs->pmc3 = mfspr(SPRN_PMC3); - host_os_sprs->pmc4 = mfspr(SPRN_PMC4); - host_os_sprs->pmc5 = mfspr(SPRN_PMC5); - host_os_sprs->pmc6 = mfspr(SPRN_PMC6); - host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); - host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); - host_os_sprs->sdar = mfspr(SPRN_SDAR); - host_os_sprs->siar = mfspr(SPRN_SIAR); - host_os_sprs->sier1 = mfspr(SPRN_SIER); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); - host_os_sprs->sier2 = mfspr(SPRN_SIER2); - host_os_sprs->sier3 = mfspr(SPRN_SIER3); - } - } - -#ifdef CONFIG_PPC_PSERIES - /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = load_pmu; - barrier(); - } -#endif - - /* - * Load guest. If the VPA said the PMCs are not in use but the guest - * tried to access them anyway, HFSCR[PM] will be set by the HFAC - * fault so we can make forward progress. - */ - if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { - mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); - mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); - mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); - mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); - mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); - mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); - mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); - mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); - mtspr(SPRN_SDAR, vcpu->arch.sdar); - mtspr(SPRN_SIAR, vcpu->arch.siar); - mtspr(SPRN_SIER, vcpu->arch.sier[0]); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); - mtspr(SPRN_SIER2, vcpu->arch.sier[1]); - mtspr(SPRN_SIER3, vcpu->arch.sier[2]); - } - - /* Set MMCRA then MMCR0 last */ - mtspr(SPRN_MMCRA, vcpu->arch.mmcra); - mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); - /* No isync necessary because we're starting counters */ - - if (!vcpu->arch.nested && - (vcpu->arch.hfscr_permitted & HFSCR_PM)) - vcpu->arch.hfscr |= HFSCR_PM; - } -} -EXPORT_SYMBOL_GPL(switch_pmu_to_guest); - -void switch_pmu_to_host(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - struct lppaca *lp; - int save_pmu = 1; - - lp = vcpu->arch.vpa.pinned_addr; - if (lp) - save_pmu = lp->pmcregs_in_use; - if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { - /* - * Save pmu if this guest is capable of running nested guests. - * This is option is for old L1s that do not set their - * lppaca->pmcregs_in_use properly when entering their L2. - */ - save_pmu |= nesting_enabled(vcpu->kvm); - } - - if (save_pmu) { - vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); - vcpu->arch.mmcra = mfspr(SPRN_MMCRA); - - freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); - - vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); - vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); - vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); - vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); - vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); - vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); - vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); - vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); - vcpu->arch.sdar = mfspr(SPRN_SDAR); - vcpu->arch.siar = mfspr(SPRN_SIAR); - vcpu->arch.sier[0] = mfspr(SPRN_SIER); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); - vcpu->arch.sier[1] = mfspr(SPRN_SIER2); - vcpu->arch.sier[2] = mfspr(SPRN_SIER3); - } - - } else if (vcpu->arch.hfscr & HFSCR_PM) { - /* - * The guest accessed PMC SPRs without specifying they should - * be preserved, or it cleared pmcregs_in_use after the last - * access. Just ensure they are frozen. - */ - freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); - - /* - * Demand-fault PMU register access in the guest. - * - * This is used to grab the guest's VPA pmcregs_in_use value - * and reflect it into the host's VPA in the case of a nested - * hypervisor. - * - * It also avoids having to zero-out SPRs after each guest - * exit to avoid side-channels when. - * - * This is cleared here when we exit the guest, so later HFSCR - * interrupt handling can add it back to run the guest with - * PM enabled next time. - */ - if (!vcpu->arch.nested) - vcpu->arch.hfscr &= ~HFSCR_PM; - } /* otherwise the PMU should still be frozen */ - -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); - barrier(); - } -#endif - - if (ppc_get_pmu_inuse()) { - mtspr(SPRN_PMC1, host_os_sprs->pmc1); - mtspr(SPRN_PMC2, host_os_sprs->pmc2); - mtspr(SPRN_PMC3, host_os_sprs->pmc3); - mtspr(SPRN_PMC4, host_os_sprs->pmc4); - mtspr(SPRN_PMC5, host_os_sprs->pmc5); - mtspr(SPRN_PMC6, host_os_sprs->pmc6); - mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); - mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); - mtspr(SPRN_SDAR, host_os_sprs->sdar); - mtspr(SPRN_SIAR, host_os_sprs->siar); - mtspr(SPRN_SIER, host_os_sprs->sier1); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); - mtspr(SPRN_SIER2, host_os_sprs->sier2); - mtspr(SPRN_SIER3, host_os_sprs->sier3); - } - - /* Set MMCRA then MMCR0 last */ - mtspr(SPRN_MMCRA, host_os_sprs->mmcra); - mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); - isync(); - } -} -EXPORT_SYMBOL_GPL(switch_pmu_to_host); - static void load_spr_state(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs) { @@ -437,17 +216,8 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING -static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) -{ - struct kvmppc_vcore *vc = vcpu->arch.vcore; - u64 tb = mftb() - vc->tb_offset_applied; - - vcpu->arch.cur_activity = next; - vcpu->arch.cur_tb_start = tb; -} - -static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; struct kvmhv_tb_accumulator *curr; @@ -477,14 +247,7 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator smp_wmb(); curr->seqcount = seq + 2; } - -#define start_timing(vcpu, next) __start_timing(vcpu, next) -#define end_timing(vcpu) __start_timing(vcpu, NULL) -#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) -#else -#define start_timing(vcpu, next) do {} while (0) -#define end_timing(vcpu) do {} while (0) -#define accumulate_time(vcpu, next) do {} while (0) +EXPORT_SYMBOL_GPL(accumulate_time); #endif static inline u64 mfslbv(unsigned int idx) @@ -795,8 +558,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV); WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME)); - start_timing(vcpu, &vcpu->arch.rm_entry); - vcpu->arch.ceded = 0; /* Save MSR for restore, with EE clear. */ @@ -957,13 +718,13 @@ tm_return_to_guest: mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); - accumulate_time(vcpu, &vcpu->arch.guest_time); - switch_pmu_to_guest(vcpu, &host_os_sprs); - kvmppc_p9_enter_guest(vcpu); - switch_pmu_to_host(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); - accumulate_time(vcpu, &vcpu->arch.rm_intr); + kvmppc_p9_enter_guest(vcpu); + + accumulate_time(vcpu, &vcpu->arch.guest_exit); + switch_pmu_to_host(vcpu, &host_os_sprs); /* XXX: Could get these from r11/12 and paca exsave instead */ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0); @@ -1058,8 +819,6 @@ tm_return_to_guest: #endif } - accumulate_time(vcpu, &vcpu->arch.rm_exit); - /* Advance host PURR/SPURR by the amount used by guest */ purr = mfspr(SPRN_PURR); spurr = mfspr(SPRN_SPURR); @@ -1166,8 +925,6 @@ tm_return_to_guest: asm volatile(PPC_CP_ABORT); out: - end_timing(vcpu); - return trap; } EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9); diff --git a/arch/powerpc/kvm/book3s_hv_p9_perf.c b/arch/powerpc/kvm/book3s_hv_p9_perf.c new file mode 100644 index 000000000000..44d24cca3df1 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_p9_perf.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "book3s_hv.h" + +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) +{ + if (!(mmcr0 & MMCR0_FC)) + goto do_freeze; + if (mmcra & MMCRA_SAMPLE_ENABLE) + goto do_freeze; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (!(mmcr0 & MMCR0_PMCCEXT)) + goto do_freeze; + if (!(mmcra & MMCRA_BHRB_DISABLE)) + goto do_freeze; + } + return; + +do_freeze: + mmcr0 = MMCR0_FC; + mmcra = 0; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mmcr0 |= MMCR0_PMCCEXT; + mmcra = MMCRA_BHRB_DISABLE; + } + + mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCRA, mmcra); + isync(); +} + +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int load_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + load_pmu = lp->pmcregs_in_use; + + /* Save host */ + if (ppc_get_pmu_inuse()) { + /* POWER9, POWER10 do not implement HPMC or SPMC */ + + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); + + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); + host_os_sprs->sdar = mfspr(SPRN_SDAR); + host_os_sprs->siar = mfspr(SPRN_SIAR); + host_os_sprs->sier1 = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); + host_os_sprs->sier2 = mfspr(SPRN_SIER2); + host_os_sprs->sier3 = mfspr(SPRN_SIER3); + } + } + +#ifdef CONFIG_PPC_PSERIES + /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = load_pmu; + barrier(); + } +#endif + + /* + * Load guest. If the VPA said the PMCs are not in use but the guest + * tried to access them anyway, HFSCR[PM] will be set by the HFAC + * fault so we can make forward progress. + */ + if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); + mtspr(SPRN_SDAR, vcpu->arch.sdar); + mtspr(SPRN_SIAR, vcpu->arch.siar); + mtspr(SPRN_SIER, vcpu->arch.sier[0]); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); + /* No isync necessary because we're starting counters */ + + if (!vcpu->arch.nested && + (vcpu->arch.hfscr_permitted & HFSCR_PM)) + vcpu->arch.hfscr |= HFSCR_PM; + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_guest); + +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int save_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + save_pmu = lp->pmcregs_in_use; + if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { + /* + * Save pmu if this guest is capable of running nested guests. + * This is option is for old L1s that do not set their + * lppaca->pmcregs_in_use properly when entering their L2. + */ + save_pmu |= nesting_enabled(vcpu->kvm); + } + + if (save_pmu) { + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); + + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); + vcpu->arch.sdar = mfspr(SPRN_SDAR); + vcpu->arch.siar = mfspr(SPRN_SIAR); + vcpu->arch.sier[0] = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); + } + + } else if (vcpu->arch.hfscr & HFSCR_PM) { + /* + * The guest accessed PMC SPRs without specifying they should + * be preserved, or it cleared pmcregs_in_use after the last + * access. Just ensure they are frozen. + */ + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); + + /* + * Demand-fault PMU register access in the guest. + * + * This is used to grab the guest's VPA pmcregs_in_use value + * and reflect it into the host's VPA in the case of a nested + * hypervisor. + * + * It also avoids having to zero-out SPRs after each guest + * exit to avoid side-channels when. + * + * This is cleared here when we exit the guest, so later HFSCR + * interrupt handling can add it back to run the guest with + * PM enabled next time. + */ + if (!vcpu->arch.nested) + vcpu->arch.hfscr &= ~HFSCR_PM; + } /* otherwise the PMU should still be frozen */ + +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif + + if (ppc_get_pmu_inuse()) { + mtspr(SPRN_PMC1, host_os_sprs->pmc1); + mtspr(SPRN_PMC2, host_os_sprs->pmc2); + mtspr(SPRN_PMC3, host_os_sprs->pmc3); + mtspr(SPRN_PMC4, host_os_sprs->pmc4); + mtspr(SPRN_PMC5, host_os_sprs->pmc5); + mtspr(SPRN_PMC6, host_os_sprs->pmc6); + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); + mtspr(SPRN_SDAR, host_os_sprs->sdar); + mtspr(SPRN_SIAR, host_os_sprs->siar); + mtspr(SPRN_SIER, host_os_sprs->sier1); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); + mtspr(SPRN_SIER2, host_os_sprs->sier2); + mtspr(SPRN_SIER3, host_os_sprs->sier3); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); + isync(); + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_host); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0fc0e68d20d0..7ded202bf995 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -237,14 +237,14 @@ kvm_novcpu_wakeup: cmpdi r4, 0 beq kvmppc_primary_no_guest -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMENTRY bl kvmhv_start_timing #endif b kvmppc_got_guest kvm_novcpu_exit: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 beq 13f @@ -523,7 +523,7 @@ kvmppc_hv_entry: li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Store initial timestamp */ cmpdi r4, 0 beq 1f @@ -894,7 +894,7 @@ fast_guest_return: li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Accumulate timing */ addi r3, r4, VCPU_TB_GUEST bl kvmhv_accumulate_time @@ -945,7 +945,7 @@ secondary_too_late: cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -959,7 +959,7 @@ hdec_soon: li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 12: stw r12, VCPU_TRAP(r4) mr r9, r4 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -1056,7 +1056,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) li r0, MSR_RI mtmsrd r0, 1 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMINTR mr r4, r9 bl kvmhv_accumulate_time @@ -1135,7 +1135,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMEXIT mr r4, r9 bl kvmhv_accumulate_time @@ -1495,7 +1495,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_LPCR,r8 isync -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 @@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r4, HSTATE_KVM_VCPU(r13) std r3, VCPU_DEC_EXPIRES(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) addi r3, r4, VCPU_TB_CEDE bl kvmhv_accumulate_time @@ -2221,7 +2221,7 @@ kvm_end_cede: /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time #endif @@ -2961,7 +2961,7 @@ kvmppc_fix_pmao: isync blr -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* * Start timing an activity * r3 = pointer to time accumulation struct, r4 = vcpu diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 8e4c79e2fcd8..08fb0843faf5 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -143,6 +143,7 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, } extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu); +extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu); extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long mfrr); extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 191992fcb2c2..fb1490761c87 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,6 +33,7 @@ #include #endif #include +#include #include "timing.h" #include "irq.h" diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 32e2cb5811cc..8d57c8428531 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -94,6 +94,7 @@ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_GET_CPU_CHARACTERISTICS, "H_GET_CPU_CHARACTERISTICS"}, \ {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ {H_POLL_PENDING, "H_POLL_PENDING"}, \ @@ -125,7 +126,25 @@ {H_COP, "H_COP"}, \ {H_GET_MPP_X, "H_GET_MPP_X"}, \ {H_SET_MODE, "H_SET_MODE"}, \ - {H_RTAS, "H_RTAS"} + {H_REGISTER_PROC_TBL, "H_REGISTER_PROC_TBL"}, \ + {H_QUERY_VAS_CAPABILITIES, "H_QUERY_VAS_CAPABILITIES"}, \ + {H_INT_GET_SOURCE_INFO, "H_INT_GET_SOURCE_INFO"}, \ + {H_INT_SET_SOURCE_CONFIG, "H_INT_SET_SOURCE_CONFIG"}, \ + {H_INT_GET_QUEUE_INFO, "H_INT_GET_QUEUE_INFO"}, \ + {H_INT_SET_QUEUE_CONFIG, "H_INT_SET_QUEUE_CONFIG"}, \ + {H_INT_ESB, "H_INT_ESB"}, \ + {H_INT_RESET, "H_INT_RESET"}, \ + {H_RPT_INVALIDATE, "H_RPT_INVALIDATE"}, \ + {H_RTAS, "H_RTAS"}, \ + {H_LOGICAL_MEMOP, "H_LOGICAL_MEMOP"}, \ + {H_CAS, "H_CAS"}, \ + {H_UPDATE_DT, "H_UPDATE_DT"}, \ + {H_GET_PERF_COUNTER_INFO, "H_GET_PERF_COUNTER_INFO"}, \ + {H_SET_PARTITION_TABLE, "H_SET_PARTITION_TABLE"}, \ + {H_ENTER_NESTED, "H_ENTER_NESTED"}, \ + {H_TLB_INVALIDATE, "H_TLB_INVALIDATE"}, \ + {H_COPY_TOFROM_GUEST, "H_COPY_TOFROM_GUEST"} + #define kvm_trace_symbol_kvmret \ {RESUME_GUEST, "RESUME_GUEST"}, \ diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 4f141daafcff..23c7805fb7b3 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,9 +53,6 @@ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ PPC_RAW_ADDI(t, a, i)) -#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2)) - - static void __init init_pt_regs(struct pt_regs *regs) { static unsigned long msr; @@ -935,21 +932,21 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "BFA = 1, CR = GT", - .instr = TEST_SETB(20, 1), + .instr = ppc_inst(PPC_RAW_SETB(20, 1)), .regs = { .ccr = 0x4000000, } }, { .descr = "BFA = 4, CR = LT", - .instr = TEST_SETB(20, 4), + .instr = ppc_inst(PPC_RAW_SETB(20, 4)), .regs = { .ccr = 0x8000, } }, { .descr = "BFA = 5, CR = EQ", - .instr = TEST_SETB(20, 5), + .instr = ppc_inst(PPC_RAW_SETB(20, 5)), .regs = { .ccr = 0x200, } @@ -1616,11 +1613,11 @@ static int __init emulate_compute_instr(struct pt_regs *regs, if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { if (negative) return -EFAULT; - pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } if (analysed == 1 && negative) - pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); if (!negative) emulate_update_regs(regs, &op); return 0; @@ -1637,7 +1634,7 @@ static int __init execute_compute_instr(struct pt_regs *regs, /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 49a737fbbd18..a96b73006dfb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -159,9 +159,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + unsigned long size; + size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); + setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X); - if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; @@ -245,10 +248,9 @@ void mmu_mark_rodata_ro(void) } /* - * Set up one of the I/D BAT (block address translation) register pairs. + * Set up one of the D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. - * On 603+, only set IBAT when _PAGE_EXEC is set */ void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -284,10 +286,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* G bit must be zero in IBATs */ flags &= ~_PAGE_EXEC; } - if (flags & _PAGE_EXEC) - bat[0] = bat[1]; - else - bat[0].batu = bat[0].batl = 0; bat_addrs[index].start = virt; bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 2e0cad5817ba..ae008b9df0e6 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -13,6 +13,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fc92613dc2bf..363a9447d63d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -408,7 +408,7 @@ repeat: ssize); if (ret == -1) { /* - * Try to to keep bolted entries in primary. + * Try to keep bolted entries in primary. * Remove non bolted entries and try insert again */ ret = mmu_hash_ops.hpte_remove(hpteg); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 753e62ba67af..1d2675ab6711 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index d2fb776febb4..5e3195568525 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -48,11 +48,13 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && - (atomic_read(&mm->context.copros) > 0)) + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index db2f3d193448..698274109c91 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1018,16 +1018,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, unsigned long change = pte_val(entry) ^ pte_val(*ptep); /* - * To avoid NMMU hang while relaxing access, we need mark - * the pte invalid in between. + * On POWER9, the NMMU is not able to relax PTE access permissions + * for a translation with a TLB. The PTE must be invalidated, TLB + * flushed before the new PTE is installed. + * + * This only needs to be done for radix, because hash translation does + * flush when updating the linux pte (and we don't support NMMU + * accelerators on HPT on POWER9 anyway XXX: do we?). + * + * POWER10 (and P9P) NMMU does behave as per ISA. */ - if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) { + if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) && + atomic_read(&mm->context.copros) > 0) { unsigned long old_pte, new_pte; old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID); - /* - * new value of pte - */ new_pte = old_pte | set; radix__flush_tlb_page_psize(mm, address, psize); __radix_pte_update(ptep, _PAGE_INVALID, new_pte); @@ -1035,9 +1040,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, __radix_pte_update(ptep, 0, set); /* * Book3S does not require a TLB flush when relaxing access - * restrictions when the address space is not attached to a - * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architecture. + * restrictions when the address space (modulo the POWER9 nest + * MMU issue above) because the MMU will reload the PTE after + * taking an access fault, as defined by the architecture. See + * "Setting a Reference or Change Bit or Upgrading Access + * Authority (PTE Subject to Atomic Hardware Updates)" in + * Power ISA Version 3.1B. */ } /* See ptesync comment in radix__set_pte_at */ @@ -1050,11 +1058,12 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. We need to do this only for radix, because hash - * translation does flush when updating the linux pte. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && (atomic_read(&mm->context.copros) > 0)) radix__flush_tlb_page(vma, addr); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index dda51fef2d2e..4e29b619578c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -755,10 +755,18 @@ EXPORT_SYMBOL(radix__local_flush_tlb_page); static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* - * P9 nest MMU has issues with the page walk cache - * caching PTEs and not flushing them properly when - * RIC = 0 for a PID/LPID invalidate + * The P9 nest MMU has issues with the page walk cache caching PTEs + * and not flushing them when RIC = 0 for a PID/LPID invalidate. + * + * This may have been fixed in shipping firmware (by disabling PWC + * or preventing it from caching PTEs), but until that is confirmed, + * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes + * to RIC=2. + * + * POWER10 (and P9P) does not have this problem. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; if (atomic_read(&mm->context.copros) > 0) return true; return false; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b282af39fcf6..bc84a594ca62 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,7 @@ #include #include #include +#include bool hugetlb_disabled = false; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 693a3a7a9463..62d9af6606cd 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -69,43 +69,9 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* - * this tells the system to map all of ram with the segregs - * (i.e. page tables) instead of the bats. - * -- Cort - */ -int __map_without_bats; -int __map_without_ltlbs; - /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; -/* - * Check for command-line options that affect what MMU_init will do. - */ -static void __init MMU_setup(void) -{ - /* Check for nobats option (used in mapin_ram). */ - if (strstr(boot_command_line, "nobats")) { - __map_without_bats = 1; - } - - if (strstr(boot_command_line, "noltlbs")) { - __map_without_ltlbs = 1; - } - if (IS_ENABLED(CONFIG_PPC_8xx)) - return; - - if (IS_ENABLED(CONFIG_KFENCE)) - __map_without_ltlbs = 1; - - if (debug_pagealloc_enabled()) - __map_without_ltlbs = 1; - - if (strict_kernel_rwx_enabled()) - __map_without_ltlbs = 1; -} - /* * MMU_init sets up the basic memory mappings for the kernel, * including both RAM and possibly some I/O regions, @@ -116,9 +82,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* parse args from command line */ - MMU_setup(); - /* * Reserve gigantic pages for hugetlb. This MUST occur before * lowmem_end_addr is initialized below. diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 4999aadb1867..699eeffd9f55 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PPC32) += init_32.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index f3e4d069e0ba..a70828a6d935 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -25,7 +25,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) int i; for (i = 0; i < PTRS_PER_PTE; i++, ptep++) - __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1); } int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c new file mode 100644 index 000000000000..11519e88dc6b --- /dev/null +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KASAN for 64-bit Book3e powerpc + * + * Copyright 2022, Christophe Leroy, CS GROUP France + */ + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +#include + +static inline bool kasan_pud_table(p4d_t p4d) +{ + return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud)); +} + +static inline bool kasan_pmd_table(pud_t pud) +{ + return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd)); +} + +static inline bool kasan_pte_table(pmd_t pmd) +{ + return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte)); +} + +static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + p4dp = p4d_offset(pgdp, ea); + if (kasan_pud_table(*p4dp)) { + pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); + } + pudp = pud_offset(p4dp, ea); + if (kasan_pmd_table(*pudp)) { + pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (kasan_pte_table(*pmdp)) { + ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + + __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0); + + return 0; +} + +static void __init kasan_init_phys_region(void *start, void *end) +{ + unsigned long k_start, k_end, k_cur; + void *va; + + if (start >= end) + return; + + k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE); + k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE); + + va = memblock_alloc(k_end - k_start, PAGE_SIZE); + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE) + kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL); +} + +void __init kasan_early_init(void) +{ + int i; + unsigned long addr; + pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START); + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i], + kasan_early_shadow_pte); + + for (i = 0; i < PTRS_PER_PUD; i++) + pud_populate(&init_mm, &kasan_early_shadow_pud[i], + kasan_early_shadow_pmd); + + for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE) + p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud); +} + +void __init kasan_init(void) +{ + phys_addr_t start, end; + u64 i; + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); + + for_each_mem_range(i, &start, &end) + kasan_init_phys_region((void *)start, (void *)end); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + + /* Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +void __init kasan_late_init(void) { } diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 0da5566d6b84..9300d641cf9a 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -99,4 +99,6 @@ void __init kasan_init(void) pr_info("KASAN init done\n"); } +void __init kasan_early_init(void) { } + void __init kasan_late_init(void) { } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a97128a48817..7b0d286bf9ba 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 63c4b1a4d435..229c72e49198 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -92,7 +92,6 @@ extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); -extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index b32e465a3d52..3684d6e570fb 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -43,7 +43,6 @@ #include -extern int __map_without_ltlbs; /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ @@ -94,7 +93,13 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) p = 0; s = total_lowmem; - if (__map_without_ltlbs) + if (IS_ENABLED(CONFIG_KFENCE)) + return 0; + + if (debug_pagealloc_enabled()) + return 0; + + if (strict_kernel_rwx_enabled()) return 0; while (s >= LARGE_PAGE_SIZE_16M) { diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 27f9186ae374..dbbfe897455d 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -14,8 +14,6 @@ #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) -extern int __map_without_ltlbs; - static unsigned long block_mapped_ram; /* @@ -28,8 +26,6 @@ phys_addr_t v_block_mapped(unsigned long va) if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) return p + va - VIRT_IMMR_BASE; - if (__map_without_ltlbs) - return 0; if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) return __pa(va); return 0; @@ -45,8 +41,6 @@ unsigned long p_block_mapped(phys_addr_t pa) if (pa >= p && pa < p + IMMR_SIZE) return VIRT_IMMR_BASE + pa - p; - if (__map_without_ltlbs) - return 0; if (pa < block_mapped_ram) return (unsigned long)__va(pa); return 0; @@ -153,9 +147,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - if (__map_without_ltlbs) - return 0; - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; @@ -179,8 +170,8 @@ void mmu_mark_initmem_nx(void) unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + if (!debug_pagealloc_enabled_or_kfence()) + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); } diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index 307ca919d393..c7d4b317a823 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -103,21 +103,11 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) int found = 0; mtspr(SPRN_MAS6, pid << 16); - if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { - asm volatile( - "li %0,0\n" - "tlbsx. 0,%1\n" - "bne 1f\n" - "li %0,1\n" - "1:\n" - : "=&r"(found) : "r"(ea)); - } else { - asm volatile( - "tlbsx 0,%1\n" - "mfspr %0,0x271\n" - "srwi %0,%0,31\n" - : "=&r"(found) : "r"(ea)); - } + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); return found; } @@ -169,13 +159,9 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) mtspr(SPRN_MAS1, mas1); mtspr(SPRN_MAS2, mas2); - if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { - mtspr(SPRN_MAS7_MAS3, mas7_3); - } else { - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); - mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); - } + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); asm volatile ("tlbwe"); diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index dd39074de9af..d62b613a0d5d 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -186,7 +186,7 @@ _GLOBAL(_tlbivax_bcast) isync PPC_TLBIVAX(0, R3) isync - eieio + mbar tlbsync BEGIN_FTR_SECTION b 1f @@ -355,7 +355,7 @@ _GLOBAL(_tlbivax_bcast) rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND 1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ PPC_TLBIVAX(0,R3) - eieio + mbar tlbsync sync wrtee r10 diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 8b97c4acfebf..68ffbfdba894 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -152,16 +152,7 @@ tlb_miss_common_bolted: clrrdi r15,r15,3 beq tlb_miss_fault_bolted /* No PGDIR, bail */ -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) ldx r14,r14,r15 /* grab pgd entry */ - beq tlb_miss_done_bolted /* tlb exists already, bail */ -MMU_FTR_SECTION_ELSE - ldx r14,r14,r15 /* grab pgd entry */ -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -222,10 +213,11 @@ itlb_miss_kernel_bolted: tlb_miss_kernel_bolted: mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + andi. r15,r15,1 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_bolted + bne+ tlb_miss_common_bolted tlb_miss_fault_bolted: /* We need to check if it was an instruction miss */ @@ -507,7 +499,9 @@ tlb_miss_huge_e6500: tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) - cmpldi cr1,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + xoris r15,r15,0xc /* Check for vmalloc region */ + cmplwi cr1,r15,1 beq+ cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: @@ -541,16 +535,18 @@ itlb_miss_fault_e6500: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* The page tables are mapped virtually linear. At this point, though, * we don't know whether we are trying to fault in a first level * virtual address or a virtual page table address. We can get that * from bit 0x1 of the region ID which we have set for a page table */ - andi. r10,r15,0x1 + andis. r10,r15,0x1 bne- virt_page_table_tlb_miss std r14,EX_TLB_ESR(r12); /* save ESR */ @@ -562,7 +558,7 @@ itlb_miss_fault_e6500: /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ /* We pre-test some combination of permissions to avoid double * faults: @@ -583,13 +579,12 @@ itlb_miss_fault_e6500: */ rlwimi r11,r14,32-19,27,27 rlwimi r11,r14,32-16,19,19 - beq normal_tlb_miss + beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -615,27 +610,28 @@ itlb_miss_fault_e6500: * * Faulting address is SRR0 which is already in r16 */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ - beq normal_tlb_miss + beq normal_tlb_miss_user li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h /* XXX replace the RMW cycles with immediate loads + writes */ mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault */ TLB_MISS_EPILOG_ERROR @@ -653,6 +649,12 @@ itlb_miss_fault_e6500: * r11 = PTE permission mask * r10 = crap (free to use) */ +normal_tlb_miss_user: +#ifdef CONFIG_PPC_KUAP + mfspr r14,SPRN_MAS1 + rlwinm. r14,r14,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif normal_tlb_miss: /* So we first construct the page table address. We do that by * shifting the bottom of the address (not the region ID) by @@ -662,32 +664,19 @@ normal_tlb_miss: * NOTE: For 64K pages, we do things slightly differently in * order to handle the weird page table format used by linux */ - ori r10,r15,0x1 + srdi r15,r16,44 + oris r10,r15,0x1 rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 - sldi r15,r10,60 - clrrdi r14,r14,3 + sldi r15,r10,44 + clrrdi r14,r14,19 or r10,r15,r14 -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) ld r14,0(r10) - beq normal_tlb_miss_done -MMU_FTR_SECTION_ELSE - ld r14,0(r10) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault -#ifdef CONFIG_PPC_KUAP - mfspr r11,SPRN_MAS1 - rlwinm. r10,r11,0,0x3fff0000 - beq- normal_tlb_miss_access_fault /* KUAP fault */ -#endif /* Now we build the MAS: * @@ -709,9 +698,7 @@ finish_normal_tlb_miss: rldicl r10,r14,64-8,64-8 cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f -#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 -#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 @@ -728,13 +715,9 @@ finish_normal_tlb_miss: li r11,MAS3_SW|MAS3_UW andc r15,r15,r11 1: -BEGIN_MMU_FTR_SECTION srdi r16,r15,32 mtspr SPRN_MAS3,r15 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r15 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -786,6 +769,7 @@ normal_tlb_miss_access_fault: */ virt_page_table_tlb_miss: /* Are we hitting a kernel page table ? */ + srdi r15,r16,60 andi. r10,r15,0x8 /* The cool thing now is that r10 contains 0 for user and 8 for kernel, @@ -810,18 +794,12 @@ virt_page_table_tlb_miss: #else 1: #endif -BEGIN_MMU_FTR_SECTION - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - */ - PPC_TLBSRX_DOT(0,R16) - beq virt_page_table_tlb_miss_done -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Now, we need to walk the page tables. First check if we are in * range. */ - rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + rldicl r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + cmpldi r10,0x80 bne- virt_page_table_tlb_miss_fault /* Get the PGD pointer */ @@ -867,41 +845,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) clrldi r11,r15,4 /* remove region ID from RPN */ ori r10,r11,1 /* Or-in SR */ -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -BEGIN_MMU_FTR_SECTION -virt_page_table_tlb_miss_done: - - /* We have overridden MAS2:EPN but currently our primary TLB miss - * handler will always restore it so that should not be an issue, - * if we ever optimize the primary handler to not write MAS2 on - * some cases, we'll have to restore MAS2:EPN here based on the - * original fault's DEAR. If we do that we have to modify the - * ITLB miss handler to also store SRR0 in the exception frame - * as DEAR. - * - * However, one nasty thing we did is we cleared the reservation - * (well, potentially we did). We do a trick here thus if we - * are not a level 0 exception (we interrupted the TLB miss) we - * offset the return address by -4 in order to replay the tlbsrx - * instruction there - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- 1f - ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) - addi r10,r11,-4 - std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) -1: -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ TLB_MISS_EPILOG_SUCCESS rfi @@ -969,23 +918,24 @@ virt_page_table_tlb_miss_whacko_fault: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss + beq+ cr1,htw_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -1011,19 +961,20 @@ virt_page_table_tlb_miss_whacko_fault: * * Faulting address is SRR0 which is already in r16 */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ @@ -1116,13 +1067,9 @@ htw_tlb_miss: */ ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -1203,13 +1150,9 @@ tlb_load_linear: clrldi r10,r10,4 /* clear region bits */ ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a56ade39dc68..3ac73f9fb5d5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,9 +135,9 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) { - mmu_mark_initmem_nx(); - } else { + mmu_mark_initmem_nx(); + + if (!v_block_mapped((unsigned long)_sinittext)) { set_memory_nx((unsigned long)_sinittext, numpages); set_memory_rw((unsigned long)_sinittext, numpages); } diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index 03607ab90c66..f884760ca5cf 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { .mask = _PAGE_RW, - .val = _PAGE_RW, - .set = "rw", - .clear = "r ", + .val = 0, + .set = "r ", + .clear = "rw", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index e46ed1e8c6ca..43f1c76d48ce 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -294,8 +294,10 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 dst_reg_h = dst_reg - 1; u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; + u32 ax_reg = bpf_to_ppc(BPF_REG_AX); u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -798,25 +800,71 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } - - /* *(u32 *)(dst + off) += src */ + save_reg = _R0; + ret_reg = src_reg; bpf_set_seen_register(ctx, tmp_reg); + bpf_set_seen_register(ctx, ax_reg); + /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); + tmp_idx = ctx->idx * 4; /* load value from memory into r0 */ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); - /* store result back */ - EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); + + /* Save old value in BPF_REG_AX */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(ax_reg, _R0)); + + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + EMIT(PPC_RAW_AND(_R0, _R0, src_reg)); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + EMIT(PPC_RAW_OR(_R0, _R0, src_reg)); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + EMIT(PPC_RAW_XOR(_R0, _R0, src_reg)); + break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_REG_0 */ + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; + default: + pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + + /* store new value */ + EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg)); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + PPC_BCC_SHORT(COND_NE, tmp_idx); + + /* For the BPF_FETCH variant, get old data into src_reg */ + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(ret_reg, ax_reg)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */ + } break; case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 594c54931e20..29ee306d6302 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -360,6 +360,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 size = BPF_SIZE(code); u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -777,42 +778,83 @@ emit_clear: * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited( - "eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } + case BPF_STX | BPF_ATOMIC | BPF_DW: + save_reg = tmp2_reg; + ret_reg = src_reg; - /* *(u32 *)(dst + off) += src */ - - /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); + /* Get offset into TMP_REG_1 */ + EMIT(PPC_RAW_LI(tmp1_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); - /* store result back */ - EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); - /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, tmp_idx); - break; - case BPF_STX | BPF_ATOMIC | BPF_DW: - if (imm != BPF_ADD) { + if (size == BPF_DW) + EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + else + EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + + /* Save old value in _R0 */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(_R0, tmp2_reg)); + + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_R0 */ + if (size == BPF_DW) + EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + else + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; + default: pr_err_ratelimited( "eBPF filter atomic op code %02x (@%d) unsupported\n", code, i); - return -ENOTSUPP; + return -EOPNOTSUPP; } - /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); - tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); - EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); - EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); + /* store new value */ + if (size == BPF_DW) + EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg)); + else + EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg)); + /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); + + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(ret_reg, _R0)); + /* + * Skip unnecessary zero-extension for 32-bit cmpxchg. + * For context, see commit 39491867ace5. + */ + if (size != BPF_DW && imm == BPF_CMPXCHG && + insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + } break; /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 140502a7fdf8..13919eb96931 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1349,27 +1349,22 @@ static void power_pmu_disable(struct pmu *pmu) * a PMI happens during interrupt replay and perf counter * values are cleared by PMU callbacks before replay. * - * If any PMC corresponding to the active PMU events are - * overflown, disable the interrupt by clearing the paca - * bit for PMI since we are disabling the PMU now. - * Otherwise provide a warning if there is PMI pending, but - * no counter is found overflown. + * Disable the interrupt by clearing the paca bit for PMI + * since we are disabling the PMU now. Otherwise provide a + * warning if there is PMI pending, but no counter is found + * overflown. + * + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). */ - if (any_pmc_overflown(cpuhw)) { - /* - * Since power_pmu_disable runs under local_irq_save, it - * could happen that code hits a PMC overflow without PMI - * pending in paca. Hence only clear PMI pending if it was - * set. - * - * If a PMI is pending, then MSR[EE] must be disabled (because - * the masked PMI handler disabling EE). So it is safe to - * call clear_pmi_irq_pending(). - */ - if (pmi_irq_pending()) - clear_pmi_irq_pending(); - } else - WARN_ON(pmi_irq_pending()); + if (pmi_irq_pending()) + clear_pmi_irq_pending(); val = mmcra = cpuhw->mmcr.mmcra; @@ -2488,6 +2483,33 @@ static int power_pmu_prepare_cpu(unsigned int cpu) return 0; } +static ssize_t pmu_name_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + if (ppmu) + return sysfs_emit(buf, "%s", ppmu->name); + + return 0; +} + +static DEVICE_ATTR_RO(pmu_name); + +static struct attribute *pmu_caps_attrs[] = { + &dev_attr_pmu_name.attr, + NULL +}; + +static const struct attribute_group pmu_caps_group = { + .name = "caps", + .attrs = pmu_caps_attrs, +}; + +static const struct attribute_group *pmu_caps_groups[] = { + &pmu_caps_group, + NULL, +}; + int __init register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -2498,6 +2520,10 @@ int __init register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + + if (ppmu->flags & PPMU_ARCH_207S) + power_pmu.attr_update = pmu_caps_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c index a59c33bed32a..e3e1a68eb1d5 100644 --- a/arch/powerpc/perf/e500-pmu.c +++ b/arch/powerpc/perf/e500-pmu.c @@ -118,12 +118,13 @@ static struct fsl_emb_pmu e500_pmu = { static int init_e500_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type) - return -ENODEV; + unsigned int pvr = mfspr(SPRN_PVR); - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + /* ec500mc */ + if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500) num_events = 256; - else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + /* e500 */ + else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != PVR_VER_E500V2) return -ENODEV; return register_fsl_emb_pmu(&e500_pmu); diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c index 44ad65da82ed..bd779a2338f8 100644 --- a/arch/powerpc/perf/e6500-pmu.c +++ b/arch/powerpc/perf/e6500-pmu.c @@ -107,8 +107,9 @@ static struct fsl_emb_pmu e6500_pmu = { static int init_e6500_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_VER_E6500) return -ENODEV; return register_fsl_emb_pmu(&e6500_pmu); diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index f3db88aee4dd..b5c414876ed5 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -151,9 +151,19 @@ static const struct attribute_group generic_compat_pmu_format_group = { .attrs = generic_compat_pmu_format_attr, }; +static struct attribute *generic_compat_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group generic_compat_pmu_caps_group = { + .name = "caps", + .attrs = generic_compat_pmu_caps_attrs, +}; + static const struct attribute_group *generic_compat_pmu_attr_groups[] = { &generic_compat_pmu_format_group, &generic_compat_pmu_events_group, + &generic_compat_pmu_caps_group, NULL, }; @@ -292,7 +302,7 @@ static int generic_compute_mmcr(u64 event[], int n_ev, } static struct power_pmu generic_compat_pmu = { - .name = "GENERIC_COMPAT", + .name = "ISAv3", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index cf5406b31e27..33c23225fd54 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1718,16 +1718,16 @@ static int hv_24x7_init(void) { int r; unsigned long hret; + unsigned int pvr = mfspr(SPRN_PVR); struct hv_perf_caps caps; if (!firmware_has_feature(FW_FEATURE_LPAR)) { pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; - } else if (!cur_cpu_spec->oprofile_cpu_type) - return -ENODEV; + } /* POWER8 only supports v1, while POWER9 only supports v2. */ - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) + if (PVR_VER(pvr) == PVR_POWER8) interface_version = 1; else { interface_version = 2; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 42abbcfc73da..56301b2bc8ae 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -686,6 +686,9 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (pevents[i]->attr.exclude_idle) + mmcr2 |= MMCR2_FCWAIT(pmc); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { if (pmc <= 4) { val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index ff122603989b..f594fa6580d1 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -249,6 +249,7 @@ /* Bits in MMCR2 for PowerISA v2.07 */ #define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) #define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) +#define MMCR2_FCWAIT(pmc) (1ull << (58 - (((pmc) - 1) * 9))) #define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) #define MAX_ALT 2 diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index e39b15b79a83..552d51a925d3 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -417,8 +417,9 @@ struct power_pmu mpc7450_pmu = { static int __init init_mpc7450_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_7450) return -ENODEV; return register_power_pmu(&mpc7450_pmu); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index c6d51e7093cf..9b5133e361a7 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -258,15 +258,26 @@ static const struct attribute_group power10_pmu_format_group = { .attrs = power10_pmu_format_attr, }; +static struct attribute *power10_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power10_pmu_caps_group = { + .name = "caps", + .attrs = power10_pmu_caps_attrs, +}; + static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { &power10_pmu_format_group, &power10_pmu_events_group_dd1, + &power10_pmu_caps_group, NULL, }; static const struct attribute_group *power10_pmu_attr_groups[] = { &power10_pmu_format_group, &power10_pmu_events_group, + &power10_pmu_caps_group, NULL, }; @@ -597,12 +608,10 @@ int __init init_power10_pmu(void) unsigned int pvr; int rc; - /* Comes from cpu_specs[] */ - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + pvr = mfspr(SPRN_PVR); + if (PVR_VER(pvr) != PVR_POWER10) return -ENODEV; - pvr = mfspr(SPRN_PVR); /* Add the ppmu flag for power10 DD1 */ if ((PVR_CFG(pvr) == 1)) power10_pmu.flags |= PPMU_P10_DD1; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 753b4740ef64..b4708ab73145 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -679,9 +679,9 @@ static struct power_pmu power5p_pmu = { int __init init_power5p_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") - && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER5p) return -ENODEV; return register_power_pmu(&power5p_pmu); diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 1f83c4cba0aa..c6aefd0a1cc8 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -620,8 +620,9 @@ static struct power_pmu power5_pmu = { int __init init_power5_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER5) return -ENODEV; return register_power_pmu(&power5_pmu); diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index aec746f86804..5729b6e059de 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -541,8 +541,9 @@ static struct power_pmu power6_pmu = { int __init init_power6_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER6) return -ENODEV; return register_power_pmu(&power6_pmu); diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a74211410b8d..c95ccf2e28da 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -447,11 +447,12 @@ static struct power_pmu power7_pmu = { int __init init_power7_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER7 && PVR_VER(pvr) != PVR_POWER7p) return -ENODEV; - if (pvr_version_is(PVR_POWER7p)) + if (PVR_VER(pvr) == PVR_POWER7p) power7_pmu.flags |= PPMU_SIAR_VALID; return register_power_pmu(&power7_pmu); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index e37b1e714d2b..ef9685065aaf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -187,9 +187,19 @@ static const struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; +static struct attribute *power8_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power8_pmu_caps_group = { + .name = "caps", + .attrs = power8_pmu_caps_attrs, +}; + static const struct attribute_group *power8_pmu_attr_groups[] = { &isa207_pmu_format_group, &power8_pmu_events_group, + &power8_pmu_caps_group, NULL, }; @@ -381,9 +391,10 @@ static struct power_pmu power8_pmu = { int __init init_power8_pmu(void) { int rc; + unsigned int pvr = mfspr(SPRN_PVR); - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) + if (PVR_VER(pvr) != PVR_POWER8E && PVR_VER(pvr) != PVR_POWER8NVL && + PVR_VER(pvr) != PVR_POWER8) return -ENODEV; rc = register_power_pmu(&power8_pmu); diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 3ad40ffb9256..cb6a7dc02dd7 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -258,9 +258,19 @@ static const struct attribute_group power9_pmu_format_group = { .attrs = power9_pmu_format_attr, }; +static struct attribute *power9_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power9_pmu_caps_group = { + .name = "caps", + .attrs = power9_pmu_caps_attrs, +}; + static const struct attribute_group *power9_pmu_attr_groups[] = { &power9_pmu_format_group, &power9_pmu_events_group, + &power9_pmu_caps_group, NULL, }; @@ -457,9 +467,7 @@ int __init init_power9_pmu(void) int rc = 0; unsigned int pvr = mfspr(SPRN_PVR); - /* Comes from cpu_specs[] */ - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) + if (PVR_VER(pvr) != PVR_POWER9) return -ENODEV; /* Blacklist events */ diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 09802482ba72..762676fb839e 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -491,9 +491,10 @@ static struct power_pmu ppc970_pmu = { int __init init_ppc970_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") - && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP && + PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX) return -ENODEV; return register_power_pmu(&ppc970_pmu); diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index 1d3bc35ee1a7..182e12855c27 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -63,7 +63,7 @@ static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask) * known as class 1, 2 and 3. For class 1 units, they are * unconditionally put to sleep when the corresponding CPM bit is * set. For class 2 and 3 units this is not case; if they can be - * put to to sleep, they will. Here we do not verify, we just + * put to sleep, they will. Here we do not verify, we just * set them and expect them to eventually go off when they can. */ value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]); diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 4348506d667d..409c0ec06265 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -203,43 +203,6 @@ int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv) } EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv); -/** - * mpc52xx_get_xtal_freq - Get SYS_XTAL_IN frequency for a device - * - * @node: device node - * - * Returns the frequency of the external oscillator clock connected - * to the SYS_XTAL_IN pin, or 0 if it cannot be determined. - */ -unsigned int mpc52xx_get_xtal_freq(struct device_node *node) -{ - u32 val; - unsigned int freq; - - if (!mpc52xx_cdm) - return 0; - - freq = mpc5xxx_get_bus_frequency(node); - if (!freq) - return 0; - - if (in_8(&mpc52xx_cdm->ipb_clk_sel) & 0x1) - freq *= 2; - - val = in_be32(&mpc52xx_cdm->rstcfg); - if (val & (1 << 5)) - freq *= 8; - else - freq *= 4; - if (val & (1 << 6)) - freq /= 12; - else - freq /= 16; - - return freq; -} -EXPORT_SYMBOL(mpc52xx_get_xtal_freq); - /** * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 968f5b727273..e43e08d991ea 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -316,17 +317,15 @@ mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) return 0; } -static void -mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { int rc; - /* Only setup GPIO if the device tree claims the GPT is - * a GPIO controller */ - if (!of_find_property(node, "gpio-controller", NULL)) + /* Only setup GPIO if the device claims the GPT is a GPIO controller */ + if (!device_property_present(gpt->dev, "gpio-controller")) return; - gpt->gc.label = kasprintf(GFP_KERNEL, "%pOF", node); + gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev)); if (!gpt->gc.label) { dev_err(gpt->dev, "out of memory\n"); return; @@ -338,7 +337,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) gpt->gc.get = mpc52xx_gpt_gpio_get; gpt->gc.set = mpc52xx_gpt_gpio_set; gpt->gc.base = -1; - gpt->gc.of_node = node; + gpt->gc.parent = gpt->dev; /* Setup external pin in GPIO mode */ clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK, @@ -351,8 +350,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) dev_dbg(gpt->dev, "%s() complete.\n", __func__); } #else /* defined(CONFIG_GPIOLIB) */ -static void -mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *p, struct device_node *np) { } +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { } #endif /* defined(CONFIG_GPIOLIB) */ /*********************************************************************** @@ -722,14 +720,14 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) raw_spin_lock_init(&gpt->lock); gpt->dev = &ofdev->dev; - gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); + gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev); gpt->regs = of_iomap(ofdev->dev.of_node, 0); if (!gpt->regs) return -ENOMEM; dev_set_drvdata(&ofdev->dev, gpt); - mpc52xx_gpt_gpio_setup(gpt, ofdev->dev.of_node); + mpc52xx_gpt_gpio_setup(gpt); mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node); mutex_lock(&mpc52xx_gpt_list_mutex); @@ -755,11 +753,6 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) return 0; } -static int mpc52xx_gpt_remove(struct platform_device *ofdev) -{ - return -EBUSY; -} - static const struct of_device_id mpc52xx_gpt_match[] = { { .compatible = "fsl,mpc5200-gpt", }, @@ -772,10 +765,10 @@ static const struct of_device_id mpc52xx_gpt_match[] = { static struct platform_driver mpc52xx_gpt_driver = { .driver = { .name = "mpc52xx-gpt", + .suppress_bind_attrs = true, .of_match_table = mpc52xx_gpt_match, }, .probe = mpc52xx_gpt_probe, - .remove = mpc52xx_gpt_remove, }; static int __init mpc52xx_gpt_init(void) diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 3285dabcf923..2fb2a85d131f 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -121,17 +121,15 @@ void __init mpc83xx_setup_pci(void) void __init mpc83xx_setup_arch(void) { + phys_addr_t immrbase = get_immrbase(); + int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; + unsigned long va = fix_to_virt(FIX_IMMR_BASE); + if (ppc_md.progress) ppc_md.progress("mpc83xx_setup_arch()", 0); - if (!__map_without_bats) { - phys_addr_t immrbase = get_immrbase(); - int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; - unsigned long va = fix_to_virt(FIX_IMMR_BASE); - - setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); - update_bats(); - } + setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); + update_bats(); } int machine_check_83xx(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 6d47a5b81485..3fa8979ac8a6 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -100,7 +100,6 @@ struct pmc_type { int has_deep_sleep; }; -static struct platform_device *pmc_dev; static int has_deep_sleep, deep_sleeping; static int pmc_irq; static struct mpc83xx_pmc __iomem *pmc_regs; @@ -319,7 +318,27 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static const struct of_device_id pmc_match[]; +static struct pmc_type pmc_types[] = { + { + .has_deep_sleep = 1, + }, + { + .has_deep_sleep = 0, + } +}; + +static const struct of_device_id pmc_match[] = { + { + .compatible = "fsl,mpc8313-pmc", + .data = &pmc_types[0], + }, + { + .compatible = "fsl,mpc8349-pmc", + .data = &pmc_types[1], + }, + {} +}; + static int pmc_probe(struct platform_device *ofdev) { struct device_node *np = ofdev->dev.of_node; @@ -336,7 +355,6 @@ static int pmc_probe(struct platform_device *ofdev) has_deep_sleep = type->has_deep_sleep; immrbase = get_immrbase(); - pmc_dev = ofdev; is_pci_agent = mpc83xx_is_pci_agent(); if (is_pci_agent < 0) @@ -401,39 +419,13 @@ out: return ret; } -static int pmc_remove(struct platform_device *ofdev) -{ - return -EPERM; -}; - -static struct pmc_type pmc_types[] = { - { - .has_deep_sleep = 1, - }, - { - .has_deep_sleep = 0, - } -}; - -static const struct of_device_id pmc_match[] = { - { - .compatible = "fsl,mpc8313-pmc", - .data = &pmc_types[0], - }, - { - .compatible = "fsl,mpc8349-pmc", - .data = &pmc_types[1], - }, - {} -}; - static struct platform_driver pmc_driver = { .driver = { .name = "mpc83xx-pmc", .of_match_table = pmc_match, + .suppress_bind_attrs = true, }, .probe = pmc_probe, - .remove = pmc_remove }; builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 2be17ffe8714..be16eba0f704 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -62,13 +62,13 @@ config MPC85xx_CDS This option enables support for the MPC85xx CDS board config MPC85xx_MDS - bool "Freescale MPC85xx MDS" + bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" select DEFAULT_UIMAGE select PHYLIB if NETDEVICES select HAVE_RAPIDIO select SWIOTLB help - This option enables support for the MPC85xx MDS board + This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards config MPC8536_DS bool "Freescale MPC8536 DS" @@ -78,28 +78,30 @@ config MPC8536_DS This option enables support for the MPC8536 DS board config MPC85xx_DS - bool "Freescale MPC85xx DS" + bool "Freescale MPC8544 DS / MPC8572 DS / P2020 DS" select PPC_I8259 select DEFAULT_UIMAGE select FSL_ULI1575 if PCI select SWIOTLB help - This option enables support for the MPC85xx DS (MPC8544 DS) board + This option enables support for the MPC8544 DS, MPC8572 DS and P2020 DS boards config MPC85xx_RDB - bool "Freescale MPC85xx RDB" + bool "Freescale P102x MBG/UTM/RDB and P2020 RDB" select PPC_I8259 select DEFAULT_UIMAGE select FSL_ULI1575 if PCI select SWIOTLB help - This option enables support for the MPC85xx RDB (P2020 RDB) board + This option enables support for the P1020 MBG PC, P1020 UTM PC, + P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB, + P1025 RDB, P2020 RDB and P2020 RDB PC boards config P1010_RDB - bool "Freescale P1010RDB" + bool "Freescale P1010 RDB" select DEFAULT_UIMAGE help - This option enables support for the MPC85xx RDB (P1010 RDB) board + This option enables support for the P1010 RDB board P1010RDB contains P1010Si, which provides CPU performance up to 800 MHz and 1600 DMIPS, additional functionality and faster interfaces diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9e2df4b66478..5185d942b455 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -2,7 +2,6 @@ config PPC32 bool default y if !PPC64 - select KASAN_VMALLOC if KASAN && MODULES config PPC64 bool "64-bit kernel" @@ -127,18 +126,18 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" - depends on PPC64 && !CPU_LITTLE_ENDIAN - select PPC_64S_HASH_MMU if PPC_BOOK3S_64 - -config GENERIC_CPU - bool "Generic (POWER8 and above)" - depends on PPC64 && CPU_LITTLE_ENDIAN - select ARCH_HAS_FAST_MULTIPLIER + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU config GENERIC_CPU + bool "Generic (POWER8 and above)" + depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU + +config POWERPC_CPU bool "Generic 32 bits powerpc" - depends on PPC32 && !PPC_8xx + depends on PPC_BOOK3S_32 config CELL_CPU bool "Cell Broadband Engine" @@ -174,11 +173,27 @@ config POWER9_CPU config E5500_CPU bool "Freescale e5500" - depends on E500 + depends on PPC64 && E500 config E6500_CPU bool "Freescale e6500" - depends on E500 + depends on PPC64 && E500 + +config 405_CPU + bool "40x family" + depends on 40x + +config 440_CPU + bool "440 (44x family)" + depends on 44x + +config 464_CPU + bool "464 (44x family)" + depends on 44x + +config 476_CPU + bool "476 (47x family)" + depends on PPC_47x config 860_CPU bool "8xx family" @@ -197,11 +212,23 @@ config G4_CPU depends on PPC_BOOK3S_32 select ALTIVEC +config E500_CPU + bool "e500 (8540)" + depends on PPC_85xx && !PPC_E500MC + +config E500MC_CPU + bool "e500mc" + depends on PPC_85xx && PPC_E500MC + +config TOOLCHAIN_DEFAULT_CPU + bool "Rely on the toolchain's implicit default CPU" + depends on PPC32 + endchoice config TARGET_CPU_BOOL bool - default !GENERIC_CPU + default !GENERIC_CPU && !TOOLCHAIN_DEFAULT_CPU config TARGET_CPU string @@ -212,10 +239,17 @@ config TARGET_CPU default "power7" if POWER7_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU + default "405" if 405_CPU + default "440" if 440_CPU + default "464" if 464_CPU + default "476" if 476_CPU default "860" if 860_CPU default "e300c2" if E300C2_CPU default "e300c3" if E300C3_CPU default "G4" if G4_CPU + default "8540" if E500_CPU + default "e500mc" if E500MC_CPU + default "powerpc" if POWERPC_CPU config PPC_BOOK3S def_bool y @@ -324,7 +358,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" - depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) + depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU) select PPC_FPU help This option enables kernel support for the Altivec extensions to the diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index f3291e957a19..5b012abca773 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -223,6 +223,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) if (!prop) { dev_dbg(&dev->dev, "axon_msi: no msi-address-(32|64) properties found\n"); + of_node_put(dn); return -ENOENT; } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 34334c32b7f5..320008528edd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -660,6 +660,7 @@ spufs_init_isolated_loader(void) return; loader = of_get_property(dn, "loader", &size); + of_node_put(dn); if (!loader) return; diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index afc1d6604d12..23c6799cfa5a 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -76,7 +76,7 @@ struct spu_context { struct address_space *mss; /* 'mss' area mappings. */ struct address_space *psmap; /* 'psmap' area mappings. */ struct mutex mapping_lock; - u64 object_id; /* user space pointer for oprofile */ + u64 object_id; /* user space pointer for GNU Debugger */ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; struct mutex state_mutex; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index f71735ec449f..04daa7f0a03c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -320,13 +320,6 @@ static void __init pmac_setup_arch(void) #endif /* CONFIG_ADB */ } -#ifdef CONFIG_SCSI -void note_scsi_host(struct device_node *node, void *host) -{ -} -EXPORT_SYMBOL(note_scsi_host); -#endif - static int initializing = 1; static int pmac_late_init(void) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index e1a05c5a9004..ae248a161b43 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -19,7 +19,7 @@ config PPC_POWERNV default y config OPAL_PRD - tristate 'OPAL PRD driver' + tristate "OPAL PRD driver" depends on PPC_POWERNV help This enables the opal-prd driver, a facility to run processor diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c8cf2728031a..9de9b2fb163d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1609,6 +1609,7 @@ found: tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; + tbl->it_index = (phb->hose->global_number << 16) | pe->pe_number; if (!iommu_init_table(tbl, phb->hose->node, 0, 0)) panic("Failed to initialize iommu table"); @@ -1779,6 +1780,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; } + tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number; if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); else diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 3805ad13b8f3..196aa70fe043 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -21,24 +21,15 @@ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul -struct powernv_rng { +struct pnv_rng { void __iomem *regs; void __iomem *regs_real; unsigned long mask; }; -static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); +static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng); -int powernv_hwrng_present(void) -{ - struct powernv_rng *rng; - - rng = get_cpu_var(powernv_rng); - put_cpu_var(rng); - return rng != NULL; -} - -static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) +static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) { unsigned long parity; @@ -58,18 +49,7 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) return val; } -int powernv_get_random_real_mode(unsigned long *v) -{ - struct powernv_rng *rng; - - rng = raw_cpu_read(powernv_rng); - - *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); - - return 1; -} - -static int powernv_get_random_darn(unsigned long *v) +static int pnv_get_random_darn(unsigned long *v) { unsigned long val; @@ -93,29 +73,31 @@ static int __init initialise_darn(void) return -ENODEV; for (i = 0; i < 10; i++) { - if (powernv_get_random_darn(&val)) { - ppc_md.get_random_seed = powernv_get_random_darn; + if (pnv_get_random_darn(&val)) { + ppc_md.get_random_seed = pnv_get_random_darn; return 0; } } return -EIO; } -int powernv_get_random_long(unsigned long *v) +int pnv_get_random_long(unsigned long *v) { - struct powernv_rng *rng; - - rng = get_cpu_var(powernv_rng); - - *v = rng_whiten(rng, in_be64(rng->regs)); - - put_cpu_var(rng); + struct pnv_rng *rng; + if (mfmsr() & MSR_DR) { + rng = get_cpu_var(pnv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } else { + rng = raw_cpu_read(pnv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } return 1; } -EXPORT_SYMBOL_GPL(powernv_get_random_long); +EXPORT_SYMBOL_GPL(pnv_get_random_long); -static __init void rng_init_per_cpu(struct powernv_rng *rng, +static __init void rng_init_per_cpu(struct pnv_rng *rng, struct device_node *dn) { int chip_id, cpu; @@ -125,16 +107,16 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng, pr_warn("No ibm,chip-id found for %pOF.\n", dn); for_each_possible_cpu(cpu) { - if (per_cpu(powernv_rng, cpu) == NULL || + if (per_cpu(pnv_rng, cpu) == NULL || cpu_to_chip_id(cpu) == chip_id) { - per_cpu(powernv_rng, cpu) = rng; + per_cpu(pnv_rng, cpu) = rng; } } } static __init int rng_create(struct device_node *dn) { - struct powernv_rng *rng; + struct pnv_rng *rng; struct resource res; unsigned long val; @@ -160,7 +142,7 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - ppc_md.get_random_seed = powernv_get_random_long; + ppc_md.get_random_seed = pnv_get_random_long; return 0; } @@ -208,7 +190,7 @@ static int __init pnv_rng_late_init(void) if (ppc_md.get_random_seed == pnv_get_random_long_early) pnv_get_random_long_early(&v); - if (ppc_md.get_random_seed == powernv_get_random_long) { + if (ppc_md.get_random_seed == pnv_get_random_long) { for_each_compatible_node(dn, NULL, "ibm,power-rng") of_platform_device_create(dn, NULL, NULL); } diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index c1bfad56447d..2b47d5a86328 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -77,7 +77,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) /* * VAS can interrupt with multiple page faults. So process all * valid CRBs within fault FIFO until reaches invalid CRB. - * We use CCW[0] and pswid to validate validate CRBs: + * We use CCW[0] and pswid to validate CRBs: * * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 * OS sets this bit to 1 after reading CRB. diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 610682caabc4..a44869e5ea70 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -165,7 +165,7 @@ config PS3_LPM If you intend to use the advanced performance monitoring and profiling support of the Cell processor with programs like - oprofile and perfmon2, then say Y or M, otherwise say N. + perfmon2, then say Y or M, otherwise say N. config PS3GELIC_UDBG bool "PS3 udbg output via UDP broadcasts on Ethernet" diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f4a647c1f0b2..fb6499977f99 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -141,6 +141,19 @@ config IBMEBUS help Bus device driver for GX bus based adapters. +config PSERIES_PLPKS + depends on PPC_PSERIES + bool "Support for the Platform Key Storage" + help + PowerVM provides an isolated Platform Keystore(PKS) storage + allocation for each LPAR with individually managed access + controls to store sensitive information securely. It can be + used to store asymmetric public keys or secrets as required + by different usecases. Select this config to enable + operating system interface to hypervisor to access this space. + + If unsure, select N. + config PAPR_SCM depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM tristate "Support for the PAPR Storage Class Memory interface" diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 7aaff5323544..14e143b946a3 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_PAPR_SCM) += papr_scm.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o +obj-$(CONFIG_PSERIES_PLPKS) += plpks.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1b0c901a6f3b..8e40ccac0f44 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -71,7 +71,7 @@ static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) if (pdev->is_virtfn) { /* * FIXME: This really should be handled by choosing the right - * parent PE in in pseries_eeh_init_edev(). + * parent PE in pseries_eeh_init_edev(). */ struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 09c119b2f623..080108d129ed 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -67,6 +67,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, + {FW_FEATURE_WATCHDOG, "hcall-watchdog"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fba64304e859..561adac69022 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -700,6 +700,33 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { .get = tce_get_pSeriesLP }; +/* + * Find nearest ibm,dma-window (default DMA window) or direct DMA window or + * dynamic 64bit DMA window, walking up the device tree. + */ +static struct device_node *pci_dma_find(struct device_node *dn, + const __be32 **dma_window) +{ + const __be32 *dw = NULL; + + for ( ; dn && PCI_DN(dn); dn = dn->parent) { + dw = of_get_property(dn, "ibm,dma-window", NULL); + if (dw) { + if (dma_window) + *dma_window = dw; + return dn; + } + dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (dw) + return dn; + dw = of_get_property(dn, DMA64_PROPNAME, NULL); + if (dw) + return dn; + } + + return NULL; +} + static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; @@ -712,20 +739,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - /* - * Find nearest ibm,dma-window (default DMA window), walking up the - * device tree - */ - for (pdn = dn; pdn != NULL; pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window != NULL) - break; - } + pdn = pci_dma_find(dn, &dma_window); - if (dma_window == NULL) { + if (dma_window == NULL) pr_debug(" no ibm,dma-window property !\n"); - return; - } ppci = PCI_DN(pdn); @@ -735,11 +752,13 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); + if (dma_window) { + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, + ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + } iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -1021,9 +1040,6 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, cfg_addr, BUID_HI(buid), BUID_LO(buid)); - dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n", - ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), - BUID_LO(buid), ret); switch (out_sz) { case 5: @@ -1041,6 +1057,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, break; } + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n", + ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), ret, query->largest_available_block, + query->page_size, query->windows_available); + return ret; } @@ -1232,7 +1253,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) bool default_win_removed = false, direct_mapping = false; bool pmem_present; struct pci_dn *pci = PCI_DN(pdn); - struct iommu_table *tbl = pci->table_group->tables[0]; + struct property *default_win = NULL; dn = of_find_node_by_type(NULL, "ibm,pmemory"); pmem_present = dn != NULL; @@ -1289,11 +1310,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for extensions presence. */ if (query.windows_available == 0) { - struct property *default_win; int reset_win_ext; /* DDW + IOMMU on single window may fail if there is any allocation */ - if (iommu_table_in_use(tbl)) { + if (iommu_table_in_use(pci->table_group->tables[0])) { dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); goto out_failed; } @@ -1429,16 +1449,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) pci->table_group->tables[1] = newtbl; - /* Keep default DMA window struct if removed */ - if (default_win_removed) { - tbl->it_size = 0; - vfree(tbl->it_map); - tbl->it_map = NULL; - } - set_iommu_table_base(&dev->dev, newtbl); } + if (default_win_removed) { + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + + /* default_win is valid here because default_win_removed == true */ + of_remove_property(pdn, default_win); + dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); + } + spin_lock(&dma_win_list_lock); list_add(&window->list, &dma_win_list); spin_unlock(&dma_win_list_lock); @@ -1503,13 +1525,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, &dma_window); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1540,7 +1556,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) { struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; - const __be32 *dma_window = NULL; /* only attempt to use a new window if 64-bit DMA is requested */ if (dma_mask < DMA_BIT_MASK(64)) @@ -1554,13 +1569,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, NULL); if (pdn && PCI_DN(pdn)) return enable_ddw(pdev, pdn); diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index ab6cdbebb35e..096d09ed89f6 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 937f9c010b22..e6c117fb6491 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 78f3f74c7056..3d36a8955eaf 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -48,6 +48,39 @@ struct update_props_workarea { #define MIGRATION_SCOPE (1) #define PRRN_SCOPE -2 +#ifdef CONFIG_PPC_WATCHDOG +static unsigned int nmi_wd_lpm_factor = 200; + +#ifdef CONFIG_SYSCTL +static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = { + { + .procname = "nmi_wd_lpm_factor", + .data = &nmi_wd_lpm_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, + {} +}; +static struct ctl_table nmi_wd_lpm_factor_sysctl_root[] = { + { + .procname = "kernel", + .mode = 0555, + .child = nmi_wd_lpm_factor_ctl_table, + }, + {} +}; + +static int __init register_nmi_wd_lpm_factor_sysctl(void) +{ + register_sysctl_table(nmi_wd_lpm_factor_sysctl_root); + + return 0; +} +device_initcall(register_nmi_wd_lpm_factor_sysctl); +#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_PPC_WATCHDOG */ + static int mobility_rtas_call(int token, char *buf, s32 scope) { int rc; @@ -427,6 +460,43 @@ static int wait_for_vasi_session_suspending(u64 handle) return ret; } +static void wait_for_vasi_session_completed(u64 handle) +{ + unsigned long state = 0; + int ret; + + pr_info("waiting for memory transfer to complete...\n"); + + /* + * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + /* + * If the memory transfer is already complete and the migration + * has been cleaned up by the hypervisor, H_PARAMETER is return, + * which is translate in EINVAL by poll_vasi_state(). + */ + if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) { + pr_info("memory transfer completed.\n"); + break; + } + + if (ret) { + pr_err("H_VASI_STATE return error (%d)\n", ret); + break; + } + + if (state != H_VASI_RESUMED) { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + break; + } + + msleep(500); + } +} + static void prod_single(unsigned int target_cpu) { long hvrc; @@ -665,19 +735,36 @@ static int pseries_suspend(u64 handle) static int pseries_migrate_partition(u64 handle) { int ret; + unsigned int factor = 0; +#ifdef CONFIG_PPC_WATCHDOG + factor = nmi_wd_lpm_factor; +#endif ret = wait_for_vasi_session_suspending(handle); if (ret) return ret; vas_migration_handler(VAS_SUSPEND); + if (factor) + watchdog_nmi_set_timeout_pct(factor); + ret = pseries_suspend(handle); - if (ret == 0) + if (ret == 0) { post_mobility_fixup(); - else + /* + * Wait until the memory transfer is complete, so that the user + * space process returns from the syscall after the transfer is + * complete. This allows the user hooks to be executed at the + * right time. + */ + wait_for_vasi_session_completed(handle); + } else pseries_cancel_migration(handle, ret); + if (factor) + watchdog_nmi_set_timeout_pct(0); + vas_migration_handler(VAS_RESUME); return ret; diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 515150417bb3..526c621b098b 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -22,6 +22,7 @@ #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 82cae08976bc..20f6ed813bff 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -29,7 +29,7 @@ (1ul << ND_CMD_SET_CONFIG_DATA) | \ (1ul << ND_CMD_CALL)) -/* DIMM health bitmap bitmap indicators */ +/* DIMM health bitmap indicators */ /* SCM device is unable to persist memory contents */ #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) /* SCM device failed to persist memory contents */ @@ -354,7 +354,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, { struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; - struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; + struct papr_scm_priv *p = dev_get_drvdata(dev); int rc, size; /* Allocate request buffer enough to hold single performance stat */ diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c new file mode 100644 index 000000000000..52aaa2894606 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * POWER LPAR Platform KeyStore(PLPKS) + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS). + */ + +#define pr_fmt(fmt) "plpks: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "plpks.h" + +#define PKS_FW_OWNER 0x1 +#define PKS_BOOTLOADER_OWNER 0x2 +#define PKS_OS_OWNER 0x3 + +#define LABEL_VERSION 0 +#define MAX_LABEL_ATTR_SIZE 16 +#define MAX_NAME_SIZE 239 +#define MAX_DATA_SIZE 4000 + +#define PKS_FLUSH_MAX_TIMEOUT 5000 //msec +#define PKS_FLUSH_SLEEP 10 //msec +#define PKS_FLUSH_SLEEP_RANGE 400 + +static u8 *ospassword; +static u16 ospasswordlength; + +// Retrieved with H_PKS_GET_CONFIG +static u16 maxpwsize; +static u16 maxobjsize; + +struct plpks_auth { + u8 version; + u8 consumer; + __be64 rsvd0; + __be32 rsvd1; + __be16 passwordlength; + u8 password[]; +} __packed __aligned(16); + +struct label_attr { + u8 prefix[8]; + u8 version; + u8 os; + u8 length; + u8 reserved[5]; +}; + +struct label { + struct label_attr attr; + u8 name[MAX_NAME_SIZE]; + size_t size; +}; + +static int pseries_status_to_err(int rc) +{ + int err; + + switch (rc) { + case H_SUCCESS: + err = 0; + break; + case H_FUNCTION: + err = -ENXIO; + break; + case H_P1: + case H_P2: + case H_P3: + case H_P4: + case H_P5: + case H_P6: + err = -EINVAL; + break; + case H_NOT_FOUND: + err = -ENOENT; + break; + case H_BUSY: + err = -EBUSY; + break; + case H_AUTHORITY: + err = -EPERM; + break; + case H_NO_MEM: + err = -ENOMEM; + break; + case H_RESOURCE: + err = -EEXIST; + break; + case H_TOO_BIG: + err = -EFBIG; + break; + case H_STATE: + err = -EIO; + break; + case H_R_STATE: + err = -EIO; + break; + case H_IN_USE: + err = -EEXIST; + break; + case H_ABORTED: + err = -EINTR; + break; + default: + err = -EINVAL; + } + + return err; +} + +static int plpks_gen_password(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + u8 *password, consumer = PKS_OS_OWNER; + int rc; + + password = kzalloc(maxpwsize, GFP_KERNEL); + if (!password) + return -ENOMEM; + + rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0, + virt_to_phys(password), maxpwsize); + + if (!rc) { + ospasswordlength = maxpwsize; + ospassword = kzalloc(maxpwsize, GFP_KERNEL); + if (!ospassword) { + kfree(password); + return -ENOMEM; + } + memcpy(ospassword, password, ospasswordlength); + } else { + if (rc == H_IN_USE) { + pr_warn("Password is already set for POWER LPAR Platform KeyStore\n"); + rc = 0; + } else { + goto out; + } + } +out: + kfree(password); + + return pseries_status_to_err(rc); +} + +static struct plpks_auth *construct_auth(u8 consumer) +{ + struct plpks_auth *auth; + + if (consumer > PKS_OS_OWNER) + return ERR_PTR(-EINVAL); + + auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + if (!auth) + return ERR_PTR(-ENOMEM); + + auth->version = 1; + auth->consumer = consumer; + auth->rsvd0 = 0; + auth->rsvd1 = 0; + + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { + auth->passwordlength = 0; + return auth; + } + + memcpy(auth->password, ospassword, ospasswordlength); + + auth->passwordlength = cpu_to_be16(ospasswordlength); + + return auth; +} + +/** + * Label is combination of label attributes + name. + * Label attributes are used internally by kernel and not exposed to the user. + */ +static struct label *construct_label(char *component, u8 varos, u8 *name, + u16 namelen) +{ + struct label *label; + size_t slen; + + if (!name || namelen > MAX_NAME_SIZE) + return ERR_PTR(-EINVAL); + + slen = strlen(component); + if (component && slen > sizeof(label->attr.prefix)) + return ERR_PTR(-EINVAL); + + label = kzalloc(sizeof(*label), GFP_KERNEL); + if (!label) + return ERR_PTR(-ENOMEM); + + if (component) + memcpy(&label->attr.prefix, component, slen); + + label->attr.version = LABEL_VERSION; + label->attr.os = varos; + label->attr.length = MAX_LABEL_ATTR_SIZE; + memcpy(&label->name, name, namelen); + + label->size = sizeof(struct label_attr) + namelen; + + return label; +} + +static int _plpks_get_config(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct { + u8 version; + u8 flags; + __be32 rsvd0; + __be16 maxpwsize; + __be16 maxobjlabelsize; + __be16 maxobjsize; + __be32 totalsize; + __be32 usedspace; + __be32 supportedpolicies; + __be64 rsvd1; + } __packed config; + size_t size; + int rc; + + size = sizeof(config); + + rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(&config), size); + + if (rc != H_SUCCESS) + return pseries_status_to_err(rc); + + maxpwsize = be16_to_cpu(config.maxpwsize); + maxobjsize = be16_to_cpu(config.maxobjsize); + + return 0; +} + +static int plpks_confirm_object_flushed(struct label *label, + struct plpks_auth *auth) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + u64 timeout = 0; + u8 status; + int rc; + + do { + rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size); + + status = retbuf[0]; + if (rc) { + if (rc == H_NOT_FOUND && status == 1) + rc = 0; + break; + } + + if (!rc && status == 1) + break; + + usleep_range(PKS_FLUSH_SLEEP, + PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE); + timeout = timeout + PKS_FLUSH_SLEEP; + } while (timeout < PKS_FLUSH_MAX_TIMEOUT); + + rc = pseries_status_to_err(rc); + + return rc; +} + +int plpks_write_var(struct plpks_var var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (!var.component || !var.data || var.datalen <= 0 || + var.namelen > MAX_NAME_SIZE || var.datalen > MAX_DATA_SIZE) + return -EINVAL; + + if (var.policy & SIGNEDUPDATE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var.component, var.os, var.name, var.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, var.policy, + virt_to_phys(var.data), var.datalen); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + if (rc) + pr_err("Failed to write variable %s for component %s with error %d\n", + var.name, var.component, rc); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (!component || vname.namelen > MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(component, varos, vname.name, vname.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + if (rc) + pr_err("Failed to remove variable %s for component %s with error %d\n", + vname.name, component, rc); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +static int plpks_read_var(u8 consumer, struct plpks_var *var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + u8 *output; + int rc; + + if (var->namelen > MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } + + output = kzalloc(maxobjsize, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out_free_label; + } + + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + + if (rc != H_SUCCESS) { + pr_err("Failed to read variable %s for component %s with error %d\n", + var->name, var->component, rc); + rc = pseries_status_to_err(rc); + goto out_free_output; + } + + if (var->datalen == 0 || var->datalen > retbuf[0]) + var->datalen = retbuf[0]; + + var->data = kzalloc(var->datalen, GFP_KERNEL); + if (!var->data) { + rc = -ENOMEM; + goto out_free_output; + } + var->policy = retbuf[1]; + + memcpy(var->data, output, var->datalen); + rc = 0; + +out_free_output: + kfree(output); +out_free_label: + kfree(label); +out_free_auth: + kfree(auth); + + return rc; +} + +int plpks_read_os_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_OS_OWNER, var); +} + +int plpks_read_fw_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_FW_OWNER, var); +} + +int plpks_read_bootloader_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_BOOTLOADER_OWNER, var); +} + +static __init int pseries_plpks_init(void) +{ + int rc; + + rc = _plpks_get_config(); + + if (rc) { + pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n"); + return rc; + } + + rc = plpks_gen_password(); + if (rc) + pr_err("Failed setting POWER LPAR Platform KeyStore Password\n"); + else + pr_info("POWER LPAR Platform KeyStore initialized successfully\n"); + + return rc; +} +arch_initcall(pseries_plpks_init); diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h new file mode 100644 index 000000000000..c6a291367bb1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Platform keystore for pseries LPAR(PLPKS). + */ + +#ifndef _PSERIES_PLPKS_H +#define _PSERIES_PLPKS_H + +#include +#include + +#define OSSECBOOTAUDIT 0x40000000 +#define OSSECBOOTENFORCE 0x20000000 +#define WORLDREADABLE 0x08000000 +#define SIGNEDUPDATE 0x01000000 + +#define PLPKS_VAR_LINUX 0x01 +#define PLPKS_VAR_COMMON 0x04 + +struct plpks_var { + char *component; + u8 *name; + u8 *data; + u32 policy; + u16 namelen; + u16 datalen; + u8 os; +}; + +struct plpks_var_name { + u8 *name; + u16 namelen; +}; + +struct plpks_var_name_list { + u32 varcount; + struct plpks_var_name varlist[]; +}; + +/** + * Writes the specified var and its data to PKS. + * Any caller of PKS driver should present a valid component type for + * their variable. + */ +int plpks_write_var(struct plpks_var var); + +/** + * Removes the specified var and its data from PKS. + */ +int plpks_remove_var(char *component, u8 varos, + struct plpks_var_name vname); + +/** + * Returns the data for the specified os variable. + */ +int plpks_read_os_var(struct plpks_var *var); + +/** + * Returns the data for the specified firmware variable. + */ +int plpks_read_fw_var(struct plpks_var *var); + +/** + * Returns the data for the specified bootloader variable. + */ +int plpks_read_bootloader_var(struct plpks_var *var); + +#endif diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ee4f1db49515..489f4c4df468 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -72,6 +73,7 @@ #include #include #include +#include #include "pseries.h" @@ -169,6 +171,18 @@ static void __init fwnmi_init(void) #endif } +/* + * Affix a device for the first timer to the platform bus if + * we have firmware support for the H_WATCHDOG hypercall. + */ +static __init int pseries_wdt_init(void) +{ + if (firmware_has_feature(FW_FEATURE_WATCHDOG)) + platform_device_register_simple("pseries-wdt", 0, NULL, 0); + return 0; +} +machine_subsys_initcall(pseries, pseries_wdt_init); + static void pseries_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -802,9 +816,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_security_mitigations(); -#ifdef CONFIG_PPC_64S_HASH_MMU - pseries_lpar_read_hblkrm_characteristics(); -#endif + if (!radix_enabled()) + pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 500a1fc4a1d7..7e6e6dd2e33e 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "vas.h" @@ -803,7 +804,7 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds) * The total number of available credits may be decreased or * increased with DLPAR operation. Means some windows have to be * closed / reopened. Hold the vas_pseries_mutex so that the - * the user space can not open new windows. + * user space can not open new windows. */ if (old_nr_creds < new_nr_creds) { /* diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore index b8dc6ff34254..5e40575c1f2b 100644 --- a/arch/powerpc/purgatory/.gitignore +++ b/arch/powerpc/purgatory/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -kexec-purgatory.c purgatory.ro diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 348f59581052..a81d155b89ae 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -2,17 +2,13 @@ KASAN_SANITIZE := n -targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c +targets += trampoline_$(BITS).o purgatory.ro LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE $(call if_changed,ld) -quiet_cmd_bin2c = BIN2C $@ - cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ - -$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE - $(call if_changed,bin2c) +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro obj-y += kexec-purgatory.o diff --git a/arch/powerpc/purgatory/kexec-purgatory.S b/arch/powerpc/purgatory/kexec-purgatory.S new file mode 100644 index 000000000000..f494fd5a0526 --- /dev/null +++ b/arch/powerpc/purgatory/kexec-purgatory.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + .section .rodata, "a" + + .align 8 +kexec_purgatory: + .globl kexec_purgatory + .incbin "arch/powerpc/purgatory/purgatory.ro" +.Lkexec_purgatory_end: + + .align 8 +kexec_purgatory_size: + .globl kexec_purgatory_size + .quad .Lkexec_purgatory_end - kexec_purgatory diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 3f130312b6e9..915f4d3991c3 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -107,7 +107,7 @@ EXPORT_SYMBOL(cpm_command); * memory mapped space. * The baud rate clock is the system clock divided by something. * It was set up long ago during the initial boot phase and is - * is given to us. + * given to us. * Baud rate clocks are zero-based in the driver code (as that maps * to port numbers). Documentation uses 1-based numbering. */ diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 1011cfea2e32..af6c8ca824d3 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -521,6 +522,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) struct resource rsrc; const int *bus_range; u8 hdr_type, progif; + u32 class_code; struct device_node *dev; struct ccsr_pci __iomem *pci; u16 temp; @@ -594,6 +596,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; if (fsl_pcie_check_link(hose)) hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK; + /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */ + if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) { + early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code); + class_code &= 0xff; + class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; + early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code); + } } else { /* * Set PBFR(PCI Bus Function Register)[10] = 1 to diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index cdbde2e0c96e..093a875d7d1e 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -18,6 +18,7 @@ struct platform_device; #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PCIE_FSL_CSR_CLASSCODE 0x474 /* FSL GPEX CSR */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ #define PCIE_IP_REV_3_0 0x02080300 /* PCIE IP block version Rev3.0 */ #define PIWAR_EN 0x80000000 /* Enable */ diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 1bfc9afa8a1a..4647c6074f3b 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -69,10 +69,10 @@ static DEFINE_SPINLOCK(fsl_rio_config_lock); -#define __fsl_read_rio_config(x, addr, err, op) \ +#define ___fsl_read_rio_config(x, addr, err, op, barrier) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ - " eieio\n" \ + " "barrier"\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %1,-1\n" \ @@ -83,6 +83,14 @@ static DEFINE_SPINLOCK(fsl_rio_config_lock); : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) +#ifdef CONFIG_BOOKE +#define __fsl_read_rio_config(x, addr, err, op) \ + ___fsl_read_rio_config(x, addr, err, op, "mbar") +#else +#define __fsl_read_rio_config(x, addr, err, op) \ + ___fsl_read_rio_config(x, addr, err, op, "eieio") +#endif + void __iomem *rio_regs_win; void __iomem *rmu_regs_win; resource_size_t rio_law_start; diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 834a6d7fbd88..c5bf7e1b3780 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -1,31 +1,34 @@ // SPDX-License-Identifier: GPL-2.0 -/** - * mpc5xxx_get_bus_frequency - Find the bus frequency for a device - * @node: device node - * - * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx), - * or 0 if the bus frequency cannot be found. - */ #include -#include #include +#include + #include -unsigned long mpc5xxx_get_bus_frequency(struct device_node *node) +/** + * mpc5xxx_fwnode_get_bus_frequency - Find the bus frequency for a firmware node + * @fwnode: firmware node + * + * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx), + * or 0 if the bus frequency cannot be found. + */ +unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode) { - const unsigned int *p_bus_freq = NULL; + struct fwnode_handle *parent; + u32 bus_freq; + int ret; - of_node_get(node); - while (node) { - p_bus_freq = of_get_property(node, "bus-frequency", NULL); - if (p_bus_freq) - break; + ret = fwnode_property_read_u32(fwnode, "bus-frequency", &bus_freq); + if (!ret) + return bus_freq; - node = of_get_next_parent(node); + fwnode_for_each_parent_node(fwnode, parent) { + ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq); + if (!ret) + return bus_freq; } - of_node_put(node); - return p_bus_freq ? *p_bus_freq : 0; + return 0; } -EXPORT_SYMBOL(mpc5xxx_get_bus_frequency); +EXPORT_SYMBOL(mpc5xxx_fwnode_get_bus_frequency); diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c index 1f408d34a6a7..420f949b7485 100644 --- a/arch/powerpc/sysdev/of_rtc.c +++ b/arch/powerpc/sysdev/of_rtc.c @@ -11,6 +11,8 @@ #include #include +#include + static __initdata struct { const char *compatible; char *plat_name; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d02911e78cfc..e2c8f93b535b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -718,6 +718,7 @@ static bool __init xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3d9782ea3fa7..26ef3388c24c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -116,7 +116,7 @@ struct bpt { static struct bpt bpts[NBPTS]; static struct bpt dabr[HBP_NUM_MAX]; static struct bpt *iabr; -static unsigned bpinstr = 0x7fe00008; /* trap */ +static unsigned int bpinstr = PPC_RAW_TRAP(); #define BP_NUM(bp) ((bp) - bpts + 1) @@ -3047,7 +3047,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %s", adr, ppc_inst_as_str(inst)); + printf(REG" %08lx", adr, ppc_inst_as_ulong(inst)); printf("\t"); if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 03b6ae37a578..6559b606736d 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -683,7 +683,7 @@ static int mpc52xx_ata_probe(struct platform_device *op) struct bcom_task *dmatsk; /* Get ipb frequency */ - ipb_freq = mpc5xxx_get_bus_frequency(op->dev.of_node); + ipb_freq = mpc5xxx_get_bus_frequency(&op->dev); if (!ipb_freq) { dev_err(&op->dev, "could not determine IPB bus frequency\n"); return -ENODEV; diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c index 8da1d7917bdc..429e956f34e1 100644 --- a/drivers/char/hw_random/powernv-rng.c +++ b/drivers/char/hw_random/powernv-rng.c @@ -23,7 +23,7 @@ static int powernv_rng_read(struct hwrng *rng, void *data, size_t max, bool wait buf = (unsigned long *)data; for (i = 0; i < len; i++) - powernv_get_random_long(buf++); + pnv_get_random_long(buf++); return len * sizeof(unsigned long); } diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 5bf92298554d..e50d7928bf8f 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -24,6 +24,8 @@ #include #include +#include +#include #include "edac_module.h" #include "mpc85xx_edac.h" #include "fsl_ddr_edac.h" diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 6793f6d799e7..0bc670778c99 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 6c698c10d3cd..81ac92bb4f6f 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -239,6 +239,7 @@ static const struct mpc_i2c_divider mpc_i2c_dividers_52xx[] = { static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, u32 *real_clk) { + struct fwnode_handle *fwnode = of_fwnode_handle(node); const struct mpc_i2c_divider *div = NULL; unsigned int pvr = mfspr(SPRN_PVR); u32 divider; @@ -246,12 +247,12 @@ static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, if (clock == MPC_I2C_CLOCK_LEGACY) { /* see below - default fdr = 0x3f -> div = 2048 */ - *real_clk = mpc5xxx_get_bus_frequency(node) / 2048; + *real_clk = mpc5xxx_fwnode_get_bus_frequency(fwnode) / 2048; return -EINVAL; } /* Determine divider value */ - divider = mpc5xxx_get_bus_frequency(node) / clock; + divider = mpc5xxx_fwnode_get_bus_frequency(fwnode) / clock; /* * We want to choose an FDR/DFSR that generates an I2C bus speed that @@ -266,7 +267,7 @@ static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, break; } - *real_clk = mpc5xxx_get_bus_frequency(node) / div->divider; + *real_clk = mpc5xxx_fwnode_get_bus_frequency(fwnode) / div->divider; return (int)div->fdr; } diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 439fab4eaa85..1bbb9ca08d40 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) switch(req->data[1]) { case ADB_QUERY_GETDEVINFO: - if (req->nbytes < 3) + if (req->nbytes < 3 || req->data[2] >= 16) break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index 65ba6697bd7d..c469b2f3e57d 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -63,7 +63,7 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev, else *mscan_clksrc = MSCAN_CLKSRC_XTAL; - freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); + freq = mpc5xxx_get_bus_frequency(&ofdev->dev); if (!freq) return 0; diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 5ddb769bdfb4..a7f4c3c29f3e 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -924,7 +924,7 @@ static int mpc52xx_fec_probe(struct platform_device *op) /* Start with safe defaults for link connection */ priv->speed = 100; priv->duplex = DUPLEX_HALF; - priv->mdio_speed = ((mpc5xxx_get_bus_frequency(np) >> 20) / 5) << 1; + priv->mdio_speed = ((mpc5xxx_get_bus_frequency(&op->dev) >> 20) / 5) << 1; /* The current speed preconfigures the speed of the MII link */ prop = of_get_property(np, "current-speed", &prop_size); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c index f85b5e81dfc1..95f778cce98c 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c @@ -100,8 +100,7 @@ static int mpc52xx_fec_mdio_probe(struct platform_device *of) dev_set_drvdata(dev, bus); /* set MII speed */ - out_be32(&priv->regs->mii_speed, - ((mpc5xxx_get_bus_frequency(of->dev.of_node) >> 20) / 5) << 1); + out_be32(&priv->regs->mii_speed, ((mpc5xxx_get_bus_frequency(dev) >> 20) / 5) << 1); err = of_mdiobus_register(bus, np); if (err) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 152f4d83765a..d37d7a19a759 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -102,7 +102,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) struct resource res; struct mii_bus *new_bus; struct fec_info *fec; - int (*get_bus_freq)(struct device_node *); + int (*get_bus_freq)(struct device *); int ret = -ENOMEM, clock, speed; match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev); @@ -136,7 +136,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) } if (get_bus_freq) { - clock = get_bus_freq(ofdev->dev.of_node); + clock = get_bus_freq(&ofdev->dev); if (!clock) { /* Use maximum divider if clock is unknown */ dev_warn(&ofdev->dev, "could not determine IPS clock\n"); diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 244fc27215dc..631eda2d467e 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index 322d3ad38159..84b541a57b7b 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include @@ -1882,11 +1882,6 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) goto out_release; } - /* Old junk for root discovery, that will die ultimately */ -#if !defined(MODULE) - note_scsi_host(mesh, mesh_host); -#endif - mesh_host->base = macio_resource_start(mdev, 0); mesh_host->irq = macio_irq(mdev, 0); ms = (struct mesh_state *) mesh_host->hostdata; diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c index 3ebdce804b90..bc5e36fd4288 100644 --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@ -437,7 +437,7 @@ static int mpc52xx_spi_probe(struct platform_device *op) ms->irq0 = irq_of_parse_and_map(op->dev.of_node, 0); ms->irq1 = irq_of_parse_and_map(op->dev.of_node, 1); ms->state = mpc52xx_spi_fsmstate_idle; - ms->ipb_freq = mpc5xxx_get_bus_frequency(op->dev.of_node); + ms->ipb_freq = mpc5xxx_get_bus_frequency(&op->dev); ms->gpio_cs_count = of_gpio_count(op->dev.of_node); if (ms->gpio_cs_count > 0) { master->num_chipselect = ms->gpio_cs_count; diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index e50f069b5ebb..3f1986c89694 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -1630,7 +1630,7 @@ mpc52xx_console_setup(struct console *co, char *options) return ret; } - uartclk = mpc5xxx_get_bus_frequency(np); + uartclk = mpc5xxx_fwnode_get_bus_frequency(of_fwnode_handle(np)); if (uartclk == 0) { pr_debug("Could not find uart clock frequency!\n"); return -EINVAL; @@ -1747,7 +1747,7 @@ static int mpc52xx_uart_of_probe(struct platform_device *op) /* set the uart clock to the input clock of the psc, the different * prescalers are taken into account in the set_baudrate() methods * of the respective chip */ - uartclk = mpc5xxx_get_bus_frequency(op->dev.of_node); + uartclk = mpc5xxx_get_bus_frequency(&op->dev); if (uartclk == 0) { dev_dbg(&op->dev, "Could not find uart clock frequency!\n"); return -EINVAL; diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index b1acb1ebebe9..91001990e351 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef CONFIG_PPC32 diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 0796f6a9e8ff..9295492d24f7 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1963,6 +1963,14 @@ config MEN_A21_WDT # PPC64 Architecture +config PSERIES_WDT + tristate "POWER Architecture Platform Watchdog Timer" + depends on PPC_PSERIES + select WATCHDOG_CORE + help + Driver for virtual watchdog timers provided by PAPR + hypervisors (e.g. PowerVM, KVM). + config WATCHDOG_RTAS tristate "RTAS watchdog" depends on PPC_RTAS diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index c324e9d820e9..cdeb119e6e61 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -187,6 +187,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o # PPC64 Architecture +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o # S390 Architecture diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c new file mode 100644 index 000000000000..7f53b5293409 --- /dev/null +++ b/drivers/watchdog/pseries-wdt.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 International Business Machines, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pseries-wdt" + +/* + * H_WATCHDOG Input + * + * R4: "flags": + * + * Bits 48-55: "operation" + */ +#define PSERIES_WDTF_OP_START 0x100UL /* start timer */ +#define PSERIES_WDTF_OP_STOP 0x200UL /* stop timer */ +#define PSERIES_WDTF_OP_QUERY 0x300UL /* query timer capabilities */ + +/* + * Bits 56-63: "timeoutAction" (for "Start Watchdog" only) + */ +#define PSERIES_WDTF_ACTION_HARD_POWEROFF 0x1UL /* poweroff */ +#define PSERIES_WDTF_ACTION_HARD_RESTART 0x2UL /* restart */ +#define PSERIES_WDTF_ACTION_DUMP_RESTART 0x3UL /* dump + restart */ + +/* + * H_WATCHDOG Output + * + * R3: Return code + * + * H_SUCCESS The operation completed. + * + * H_BUSY The hypervisor is too busy; retry the operation. + * + * H_PARAMETER The given "flags" are somehow invalid. Either the + * "operation" or "timeoutAction" is invalid, or a + * reserved bit is set. + * + * H_P2 The given "watchdogNumber" is zero or exceeds the + * supported maximum value. + * + * H_P3 The given "timeoutInMs" is below the supported + * minimum value. + * + * H_NOOP The given "watchdogNumber" is already stopped. + * + * H_HARDWARE The operation failed for ineffable reasons. + * + * H_FUNCTION The H_WATCHDOG hypercall is not supported by this + * hypervisor. + * + * R4: + * + * - For the "Query Watchdog Capabilities" operation, a 64-bit + * structure: + */ +#define PSERIES_WDTQ_MIN_TIMEOUT(cap) (((cap) >> 48) & 0xffff) +#define PSERIES_WDTQ_MAX_NUMBER(cap) (((cap) >> 32) & 0xffff) + +static const unsigned long pseries_wdt_action[] = { + [0] = PSERIES_WDTF_ACTION_HARD_POWEROFF, + [1] = PSERIES_WDTF_ACTION_HARD_RESTART, + [2] = PSERIES_WDTF_ACTION_DUMP_RESTART, +}; + +#define WATCHDOG_ACTION 1 +static unsigned int action = WATCHDOG_ACTION; +module_param(action, uint, 0444); +MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=" + __MODULE_STRING(WATCHDOG_ACTION) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0444); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define WATCHDOG_TIMEOUT 60 +static unsigned int timeout = WATCHDOG_TIMEOUT; +module_param(timeout, uint, 0444); +MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default=" + __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); + +struct pseries_wdt { + struct watchdog_device wd; + unsigned long action; + unsigned long num; /* Watchdog numbers are 1-based */ +}; + +static int pseries_wdt_start(struct watchdog_device *wdd) +{ + struct pseries_wdt *pw = watchdog_get_drvdata(wdd); + struct device *dev = wdd->parent; + unsigned long flags, msecs; + long rc; + + flags = pw->action | PSERIES_WDTF_OP_START; + msecs = wdd->timeout * MSEC_PER_SEC; + rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs); + if (rc != H_SUCCESS) { + dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu", + rc, pw->num); + return -EIO; + } + return 0; +} + +static int pseries_wdt_stop(struct watchdog_device *wdd) +{ + struct pseries_wdt *pw = watchdog_get_drvdata(wdd); + struct device *dev = wdd->parent; + long rc; + + rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num); + if (rc != H_SUCCESS && rc != H_NOOP) { + dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu", + rc, pw->num); + return -EIO; + } + return 0; +} + +static struct watchdog_info pseries_wdt_info = { + .identity = DRV_NAME, + .options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT + | WDIOF_PRETIMEOUT, +}; + +static const struct watchdog_ops pseries_wdt_ops = { + .owner = THIS_MODULE, + .start = pseries_wdt_start, + .stop = pseries_wdt_stop, +}; + +static int pseries_wdt_probe(struct platform_device *pdev) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct pseries_wdt *pw; + unsigned long cap; + long msecs, rc; + int err; + + rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY); + if (rc == H_FUNCTION) + return -ENODEV; + if (rc != H_SUCCESS) + return -EIO; + cap = ret[0]; + + pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL); + if (!pw) + return -ENOMEM; + + /* + * Assume watchdogNumber 1 for now. If we ever support + * multiple timers we will need to devise a way to choose a + * distinct watchdogNumber for each platform device at device + * registration time. + */ + pw->num = 1; + if (PSERIES_WDTQ_MAX_NUMBER(cap) < pw->num) + return -ENODEV; + + if (action >= ARRAY_SIZE(pseries_wdt_action)) + return -EINVAL; + pw->action = pseries_wdt_action[action]; + + pw->wd.parent = &pdev->dev; + pw->wd.info = &pseries_wdt_info; + pw->wd.ops = &pseries_wdt_ops; + msecs = PSERIES_WDTQ_MIN_TIMEOUT(cap); + pw->wd.min_timeout = DIV_ROUND_UP(msecs, MSEC_PER_SEC); + pw->wd.max_timeout = UINT_MAX / 1000; /* from linux/watchdog.h */ + pw->wd.timeout = timeout; + if (watchdog_init_timeout(&pw->wd, 0, NULL)) + return -EINVAL; + watchdog_set_nowayout(&pw->wd, nowayout); + watchdog_stop_on_reboot(&pw->wd); + watchdog_stop_on_unregister(&pw->wd); + watchdog_set_drvdata(&pw->wd, pw); + + err = devm_watchdog_register_device(&pdev->dev, &pw->wd); + if (err) + return err; + + platform_set_drvdata(pdev, &pw->wd); + + return 0; +} + +static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct watchdog_device *wd = platform_get_drvdata(pdev); + + if (watchdog_active(wd)) + return pseries_wdt_stop(wd); + return 0; +} + +static int pseries_wdt_resume(struct platform_device *pdev) +{ + struct watchdog_device *wd = platform_get_drvdata(pdev); + + if (watchdog_active(wd)) + return pseries_wdt_start(wd); + return 0; +} + +static const struct platform_device_id pseries_wdt_id[] = { + { .name = "pseries-wdt" }, + {} +}; +MODULE_DEVICE_TABLE(platform, pseries_wdt_id); + +static struct platform_driver pseries_wdt_driver = { + .driver = { + .name = DRV_NAME, + }, + .id_table = pseries_wdt_id, + .probe = pseries_wdt_probe, + .resume = pseries_wdt_resume, + .suspend = pseries_wdt_suspend, +}; +module_platform_driver(pseries_wdt_driver); + +MODULE_AUTHOR("Alexey Kardashevskiy"); +MODULE_AUTHOR("Scott Cheloha"); +MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 750c7f395ca9..f700ff2df074 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -122,6 +122,8 @@ int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); +void lockup_detector_reconfigure(void); + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ecb0e8346e65..8e61f21e7e33 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -537,7 +537,7 @@ int lockup_detector_offline_cpu(unsigned int cpu) return 0; } -static void lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -557,6 +557,13 @@ static void lockup_detector_reconfigure(void) __lockup_detector_cleanup(); } +void lockup_detector_reconfigure(void) +{ + mutex_lock(&watchdog_mutex); + __lockup_detector_reconfigure(); + mutex_unlock(&watchdog_mutex); +} + /* * Create the watchdog infrastructure and configure the detector(s). */ @@ -573,13 +580,13 @@ static __init void lockup_detector_setup(void) return; mutex_lock(&watchdog_mutex); - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ -static void lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -587,9 +594,13 @@ static void lockup_detector_reconfigure(void) watchdog_nmi_start(); cpus_read_unlock(); } +void lockup_detector_reconfigure(void) +{ + __lockup_detector_reconfigure(); +} static inline void lockup_detector_setup(void) { - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -629,7 +640,7 @@ static void proc_watchdog_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); } /* diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index 379e86c71bed..51e5c76bcd07 100755 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -20,6 +20,8 @@ set -e # yard. Stale files stay in this file for a while (for some release cycles?), # then will be really dead and removed from the code base entirely. +rm -f arch/powerpc/purgatory/kexec-purgatory.c + # These were previously generated source files. When you are building the kernel # with O=, make sure to remove the stale files in the output tree. Otherwise, # the build system wrongly compiles the stale ones. diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index 886dc026fe7a..26cde8ea1f49 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -5,6 +5,16 @@ #include #include +#ifdef __powerpc64__ +#define PPC_LL ld +#define PPC_STL std +#define PPC_STLU stdu +#else +#define PPC_LL lwz +#define PPC_STL stw +#define PPC_STLU stwu +#endif + #define LOAD_REG_IMMEDIATE(reg, expr) \ lis reg, (expr)@highest; \ ori reg, reg, (expr)@higher; \ @@ -14,16 +24,20 @@ /* * Note: These macros assume that variables being stored on the stack are - * doublewords, while this is usually the case it may not always be the + * sizeof(long), while this is usually the case it may not always be the * case for each use case. */ +#ifdef __powerpc64__ + +// ABIv2 #if defined(_CALL_ELF) && _CALL_ELF == 2 #define STACK_FRAME_MIN_SIZE 32 #define STACK_FRAME_TOC_POS 24 #define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) -#else + +#else // ABIv1 below #define STACK_FRAME_MIN_SIZE 112 #define STACK_FRAME_TOC_POS 40 #define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) @@ -34,7 +48,24 @@ */ #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ (112 + ((_var_num)*8)) -#endif + + +#endif // ABIv2 + +// Common 64-bit +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +#else // 32-bit below + +#define STACK_FRAME_MIN_SIZE 16 +#define STACK_FRAME_LR_POS 4 + +#define __STACK_FRAME_PARAM(_param) (STACK_FRAME_MIN_SIZE + ((_param)*4)) +#define __STACK_FRAME_LOCAL(_num_params, _var_num) \ + ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*4)) + +#endif // __powerpc64__ /* Parameter x saved to the stack */ #define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) @@ -42,8 +73,6 @@ /* Local variable x saved to the stack after x parameters */ #define STACK_FRAME_LOCAL(num_params, var) \ __STACK_FRAME_LOCAL(num_params, var) -#define STACK_FRAME_LR_POS 16 -#define STACK_FRAME_CR_POS 8 /* * It is very important to note here that _extra is the extra amount of @@ -56,19 +85,21 @@ * preprocessed incorrectly, hence r0. */ #define PUSH_BASIC_STACK(_extra) \ - mflr r0; \ - std r0, STACK_FRAME_LR_POS(%r1); \ - stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ - mfcr r0; \ - stw r0, STACK_FRAME_CR_POS(%r1); \ - std %r2, STACK_FRAME_TOC_POS(%r1); + mflr r0; \ + PPC_STL r0, STACK_FRAME_LR_POS(%r1); \ + PPC_STLU %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); #define POP_BASIC_STACK(_extra) \ - ld %r2, STACK_FRAME_TOC_POS(%r1); \ - lwz r0, STACK_FRAME_CR_POS(%r1); \ - mtcr r0; \ - addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \ - ld r0, STACK_FRAME_LR_POS(%r1); \ + addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ + PPC_LL r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; +.macro OP_REGS op, reg_width, start_reg, end_reg, base_reg, base_reg_offset=0, skip=0 + .set i, \start_reg + .rept (\end_reg - \start_reg + 1) + \op i, (\reg_width * (i - \skip) + \base_reg_offset)(\base_reg) + .set i, i + 1 + .endr +.endm + #endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c422be8a42b2..d5a547f72669 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -55,6 +55,10 @@ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) #define SPRN_PVR 0x11F +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -123,45 +127,44 @@ "li 30, %[" #_asm_symbol_name_immed "];" \ "li 31, %[" #_asm_symbol_name_immed "];" -#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \ - "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 31, 0(%[" #_asm_symbol_name_addr "]);" +#define ASM_LOAD_FPR(_asm_symbol_name_addr) \ + "lfd 0, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 1, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 2, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 3, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 4, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 5, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 6, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 7, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 8, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 9, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 10, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 11, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 12, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 13, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 14, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 15, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 16, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 17, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 18, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 19, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 20, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 21, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 22, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 23, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 24, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 25, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 26, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 27, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 28, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 29, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 30, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 31, 0(%[" #_asm_symbol_name_addr "]);" #ifndef __ASSEMBLER__ void store_gpr(unsigned long *addr); void load_gpr(unsigned long *addr); -void load_fpr_single_precision(float *addr); -void store_fpr_single_precision(float *addr); +void store_fpr(double *addr); #endif /* end of __ASSEMBLER__ */ #endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index b9fa9cd709df..e222a5858450 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -74,6 +74,16 @@ static inline bool have_hwcap2(unsigned long ftr2) } #endif +static inline char *auxv_base_platform(void) +{ + return ((char *)get_auxv_entry(AT_BASE_PLATFORM)); +} + +static inline char *auxv_platform(void) +{ + return ((char *)get_auxv_entry(AT_PLATFORM)); +} + bool is_ppc64le(void); int using_hash_mmu(bool *using_hash); diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S index 9304ea7d59b9..6d1af4a9a6b4 100644 --- a/tools/testing/selftests/powerpc/lib/reg.S +++ b/tools/testing/selftests/powerpc/lib/reg.S @@ -53,79 +53,42 @@ FUNC_START(store_gpr) blr FUNC_END(store_gpr) -/* Single Precision Float - float buf[32] */ -FUNC_START(load_fpr_single_precision) - lfs 0, 0*4(3) - lfs 1, 1*4(3) - lfs 2, 2*4(3) - lfs 3, 3*4(3) - lfs 4, 4*4(3) - lfs 5, 5*4(3) - lfs 6, 6*4(3) - lfs 7, 7*4(3) - lfs 8, 8*4(3) - lfs 9, 9*4(3) - lfs 10, 10*4(3) - lfs 11, 11*4(3) - lfs 12, 12*4(3) - lfs 13, 13*4(3) - lfs 14, 14*4(3) - lfs 15, 15*4(3) - lfs 16, 16*4(3) - lfs 17, 17*4(3) - lfs 18, 18*4(3) - lfs 19, 19*4(3) - lfs 20, 20*4(3) - lfs 21, 21*4(3) - lfs 22, 22*4(3) - lfs 23, 23*4(3) - lfs 24, 24*4(3) - lfs 25, 25*4(3) - lfs 26, 26*4(3) - lfs 27, 27*4(3) - lfs 28, 28*4(3) - lfs 29, 29*4(3) - lfs 30, 30*4(3) - lfs 31, 31*4(3) +/* Double Precision Float - double buf[32] */ +FUNC_START(store_fpr) + stfd 0, 0*8(3) + stfd 1, 1*8(3) + stfd 2, 2*8(3) + stfd 3, 3*8(3) + stfd 4, 4*8(3) + stfd 5, 5*8(3) + stfd 6, 6*8(3) + stfd 7, 7*8(3) + stfd 8, 8*8(3) + stfd 9, 9*8(3) + stfd 10, 10*8(3) + stfd 11, 11*8(3) + stfd 12, 12*8(3) + stfd 13, 13*8(3) + stfd 14, 14*8(3) + stfd 15, 15*8(3) + stfd 16, 16*8(3) + stfd 17, 17*8(3) + stfd 18, 18*8(3) + stfd 19, 19*8(3) + stfd 20, 20*8(3) + stfd 21, 21*8(3) + stfd 22, 22*8(3) + stfd 23, 23*8(3) + stfd 24, 24*8(3) + stfd 25, 25*8(3) + stfd 26, 26*8(3) + stfd 27, 27*8(3) + stfd 28, 28*8(3) + stfd 29, 29*8(3) + stfd 30, 30*8(3) + stfd 31, 31*8(3) blr -FUNC_END(load_fpr_single_precision) - -/* Single Precision Float - float buf[32] */ -FUNC_START(store_fpr_single_precision) - stfs 0, 0*4(3) - stfs 1, 1*4(3) - stfs 2, 2*4(3) - stfs 3, 3*4(3) - stfs 4, 4*4(3) - stfs 5, 5*4(3) - stfs 6, 6*4(3) - stfs 7, 7*4(3) - stfs 8, 8*4(3) - stfs 9, 9*4(3) - stfs 10, 10*4(3) - stfs 11, 11*4(3) - stfs 12, 12*4(3) - stfs 13, 13*4(3) - stfs 14, 14*4(3) - stfs 15, 15*4(3) - stfs 16, 16*4(3) - stfs 17, 17*4(3) - stfs 18, 18*4(3) - stfs 19, 19*4(3) - stfs 20, 20*4(3) - stfs 21, 21*4(3) - stfs 22, 22*4(3) - stfs 23, 23*4(3) - stfs 24, 24*4(3) - stfs 25, 25*4(3) - stfs 26, 26*4(3) - stfs 27, 27*4(3) - stfs 28, 28*4(3) - stfs 29, 29*4(3) - stfs 30, 30*4(3) - stfs 31, 31*4(3) - blr -FUNC_END(store_fpr_single_precision) +FUNC_END(store_fpr) /* VMX/VSX registers - unsigned long buf[128] */ FUNC_START(loadvsx) diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index d0c23b2e4b60..07b4893ef7af 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -7,3 +7,4 @@ fpu_signal vmx_signal vsx_preempt fpu_denormal +mma diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S index 8528c9849565..61cc88b1b26b 100644 --- a/tools/testing/selftests/powerpc/math/mma.S +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -20,6 +20,9 @@ test_mma: /* xvi16ger2s */ .long 0xec042958 + /* Deprime the accumulator - xxmfacc 0 */ + .long 0x7c000162 + /* Store result in image passed in r5 */ stxvw4x 0,0,5 addi 5,5,16 diff --git a/tools/testing/selftests/powerpc/mce/.gitignore b/tools/testing/selftests/powerpc/mce/.gitignore new file mode 100644 index 000000000000..f5921462a495 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/.gitignore @@ -0,0 +1 @@ +inject-ra-err diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c index bab0dc06e90b..9b655be641c9 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -7,6 +7,7 @@ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. */ +#include #include #include #include @@ -32,7 +33,7 @@ enum type { NUM_VAL }; -int value_type(int id) +static int value_type(int id) { int val_type; @@ -54,15 +55,21 @@ int value_type(int id) return val_type; } -int verify_energy_info(void) +static int verify_energy_info(void) { const char *path = "/sys/firmware/papr/energy_scale_info"; struct dirent *entry; struct stat s; DIR *dirp; - if (stat(path, &s) || !S_ISDIR(s.st_mode)) - return -1; + errno = 0; + if (stat(path, &s)) { + SKIP_IF(errno == ENOENT); + FAIL_IF(errno); + } + + FAIL_IF(!S_ISDIR(s.st_mode)); + dirp = opendir(path); while ((entry = readdir(dirp)) != NULL) { @@ -76,25 +83,24 @@ int verify_energy_info(void) id = atoi(entry->d_name); attr_type = value_type(id); - if (attr_type == INVALID) - return -1; + FAIL_IF(attr_type == INVALID); /* Check if the files exist and have data in them */ sprintf(file_name, "%s/%d/desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); sprintf(file_name, "%s/%d/value", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); if (attr_type == STR_VAL) { sprintf(file_name, "%s/%d/value_desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); } } diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index edbd96d3b2ab..30803353bd7c 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests +all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -27,6 +27,7 @@ override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) @@ -34,6 +35,7 @@ override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) @@ -41,12 +43,14 @@ override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all @@ -54,4 +58,7 @@ ebb: sampling_tests: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb sampling_tests +event_code_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests event_code_tests diff --git a/tools/testing/selftests/powerpc/pmu/branch_loops.S b/tools/testing/selftests/powerpc/pmu/branch_loops.S new file mode 100644 index 000000000000..de758dd3cecf --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/branch_loops.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include + + .text + +#define ITER_SHIFT 31 + +FUNC_START(indirect_branch_loop) + li r3, 1 + sldi r3, r3, ITER_SHIFT + +1: cmpdi r3, 0 + beqlr + + addi r3, r3, -1 + + ld r4, 2f@got(%r2) + mtctr r4 + bctr + + .balign 32 +2: b 1b + +FUNC_END(indirect_branch_loop) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore index 2920fb39439b..64d8dfdac74a 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -21,3 +21,4 @@ back_to_back_ebbs_test lost_exception_test no_handler_test cycles_with_mmcr2_test +regs_access_pmccext_test diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 4b45a2e70f62..fc32187d483d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -50,6 +50,7 @@ int cycles_with_mmcr2(void) expected[1] = MMCR2_EXPECTED_2; i = 0; bad_mmcr2 = false; + actual = 0; /* Make sure we loop until we take at least one EBB */ while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) || diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile new file mode 100644 index 000000000000..4e07d7046457 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ + group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ + group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ + blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ + group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ + group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test \ + hw_cache_event_type_test + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c new file mode 100644 index 000000000000..fafeff19cb34 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_DTLB_MISS_16G 0x1c058 +#define PM_DERAT_MISS_2M 0x1c05a +#define PM_DTLB_MISS_2M 0x1c05c +#define PM_MRK_DTLB_MISS_1G 0x1d15c +#define PM_DTLB_MISS_4K 0x2c056 +#define PM_DERAT_MISS_1G 0x2c05a +#define PM_MRK_DERAT_MISS_2M 0x2d152 +#define PM_MRK_DTLB_MISS_4K 0x2d156 +#define PM_MRK_DTLB_MISS_16G 0x2d15e +#define PM_DTLB_MISS_64K 0x3c056 +#define PM_MRK_DERAT_MISS_1G 0x3d152 +#define PM_MRK_DTLB_MISS_64K 0x3d156 +#define PM_DISP_HELD_SYNC_HOLD 0x4003c +#define PM_DTLB_MISS_16M 0x4c056 +#define PM_DTLB_MISS_1G 0x4c05a +#define PM_MRK_DTLB_MISS_16M 0x4c15e +#define PM_MRK_ST_DONE_L2 0x10134 +#define PM_RADIX_PWC_L1_HIT 0x1f056 +#define PM_FLOP_CMPL 0x100f4 +#define PM_MRK_NTF_FIN 0x20112 +#define PM_RADIX_PWC_L2_HIT 0x2d024 +#define PM_IFETCH_THROTTLE 0x3405e +#define PM_MRK_L2_TM_ST_ABORT_SISTER 0x3e15c +#define PM_RADIX_PWC_L3_HIT 0x3f056 +#define PM_RUN_CYC_SMT2_MODE 0x3006c +#define PM_TM_TX_PASS_RUN_INST 0x4e014 + +#define PVR_POWER9_CUMULUS 0x00002000 + +int blacklist_events_dd21[] = { + PM_MRK_ST_DONE_L2, + PM_RADIX_PWC_L1_HIT, + PM_FLOP_CMPL, + PM_MRK_NTF_FIN, + PM_RADIX_PWC_L2_HIT, + PM_IFETCH_THROTTLE, + PM_MRK_L2_TM_ST_ABORT_SISTER, + PM_RADIX_PWC_L3_HIT, + PM_RUN_CYC_SMT2_MODE, + PM_TM_TX_PASS_RUN_INST, + PM_DISP_HELD_SYNC_HOLD, +}; + +int blacklist_events_dd22[] = { + PM_DTLB_MISS_16G, + PM_DERAT_MISS_2M, + PM_DTLB_MISS_2M, + PM_MRK_DTLB_MISS_1G, + PM_DTLB_MISS_4K, + PM_DERAT_MISS_1G, + PM_MRK_DERAT_MISS_2M, + PM_MRK_DTLB_MISS_4K, + PM_MRK_DTLB_MISS_16G, + PM_DTLB_MISS_64K, + PM_MRK_DERAT_MISS_1G, + PM_MRK_DTLB_MISS_64K, + PM_DISP_HELD_SYNC_HOLD, + PM_DTLB_MISS_16M, + PM_DTLB_MISS_1G, + PM_MRK_DTLB_MISS_16M, +}; + +int pvr_min; + +/* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ +int check_for_power9_version(void) +{ + pvr_min = PVR_MIN(mfspr(SPRN_PVR)); + + SKIP_IF(PVR_VER(pvr) != POWER9); + SKIP_IF(!(pvr & PVR_POWER9_CUMULUS)); + + SKIP_IF(!(3 - pvr_min)); + + return 0; +} + +/* + * Testcase to ensure that using blacklisted bits in + * event code should cause event_open to fail in power9 + */ + +static int blacklisted_events(void) +{ + struct event event; + int i = 0; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ + SKIP_IF(check_for_power9_version()); + + /* Skip for Generic compat mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + if (pvr_min == 1) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd21); i++) { + event_init(&event, blacklist_events_dd21[i]); + FAIL_IF(!event_open(&event)); + } + } else if (pvr_min == 2) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd22); i++) { + event_init(&event, blacklist_events_dd22[i]); + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(blacklisted_events, "blacklisted_events"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c new file mode 100644 index 000000000000..8be7aada6523 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x100fc +#define EventCode_2 0x200fa +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p10(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10); + + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Test for event alternative for 0x0001e + * and 0x00002. + */ + e = &events[0]; + event_init(e, 0x0001e); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x0001e has alternative event + * 0x600f4 in PMC6. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + e = &events[0]; + event_init(e, 0x00002); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x00020 has alternative event + * 0x500fa in PMC5. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p10, "event_alternatives_tests_p10"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c new file mode 100644 index 000000000000..f7dcf0e0447c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x200fa +#define EventCode_2 0x200fc +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p9(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER9); + + /* Skip for generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for PM_RUN_CYC_ALT */ + event_init(&leader, PM_RUN_CYC_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_1); + + /* + * Expected to pass since PM_RUN_CYC_ALT in PMC2 has alternative event + * 0x600f4. So it can go in with EventCode_1 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_INST_DISP); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_INST_DISP in PMC2 has alternative event + * 0x300f2 in PMC3. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_BR_2PATH); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_BR_2PATH in PMC2 has alternative event + * 0x40036 in PMC4. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_LD_MISS_L1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_3); + /* + * Expected to pass since PM_LD_MISS_L1 in PMC3 has alternative event + * 0x400f0 in PMC4. So it can go in with EventCode_3 which is using PMC3 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_RUN_INST_CMPL_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_4); + /* + * Expected to pass since PM_RUN_INST_CMPL_ALT in PMC4 has alternative event + * 0x500fa in PMC5. So it can go in with EventCode_4 which is using PMC4 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p9, "event_alternatives_tests_p9"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c new file mode 100644 index 000000000000..0d237c15d3f2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase to ensure that using invalid event in generic + * event for PERF_TYPE_HARDWARE should fail + */ + +static int generic_events_valid_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* generic events is different in compat_mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Invalid generic events in power10: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND + * - PERF_COUNT_HW_STALLED_CYCLES_BACKEND + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + /* + * Invalid generic events in power9: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(generic_events_valid_test, "generic_events_valid_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c new file mode 100644 index 000000000000..f4be05aa3a3d --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All L1 D cache load references counted at finish, gated by reject */ +#define EventCode_1 0x1100fc +/* Load Missed L1 */ +#define EventCode_2 0x23e054 +/* Load Missed L1 */ +#define EventCode_3 0x13e054 + +/* + * Testcase for group constraint check of data and instructions + * cache qualifier bits which is used to program cache select field in + * Monitor Mode Control Register 1 (MMCR1: 16-17) for l1 cache. + * All events in the group should match cache select bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_cache(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint check for l1 cache select bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l1 cache select bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l1 cache select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l1 cache select bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_cache, "group_constraint_cache"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c new file mode 100644 index 000000000000..85a636886069 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode_1 0x010000046080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_2 0x26880 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_3 0x010000026880 + +/* + * Testcase for group constraint check of l2l3_sel bits which is + * used to program l2l3 select field in Monitor Mode Control Register 0 + * (MMCR0: 56-60). + * All events in the group should match l2l3_sel bits otherwise + * event_open for the group should fail. + */ +static int group_constraint_l2l3_sel(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for l2l3_sel bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l2l3_sel bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l2l3_sel test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l2l3_sel bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_l2l3_sel, "group_constraint_l2l3_sel"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c new file mode 100644 index 000000000000..ff625b5d80eb --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x353c0101ec +#define EventCode_3 0x35340101ec +/* + * Test that using different sample bits in + * event code cause failure in schedule for + * group of events. + */ + +static int group_constraint_mmcra_sample(void) +{ + struct event event, leader; + + SKIP_IF(platform_check_for_tests()); + + /* + * Events with different "sample" field values + * in a group will fail to schedule. + * Use event with load only sampling mode as + * group leader. Use event with store only sampling + * as sibling event. + */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't use same sampling bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_3); + + /* Expected to pass as sibling event use same sampling bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_mmcra_sample, "group_constraint_mmcra_sample"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c new file mode 100644 index 000000000000..f5ee4796d46c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for checking constraint checks for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). Events using + * PMC5/PMC6 shouldn't have other fields in event + * code like cache bits, thresholding or marked bit. + */ + +static int group_constraint_pmc56(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using PMC5 and PMC6 with cache bit + * set in event code is expected to fail. + */ + event_init(&event, 0x2500fa); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x2600f4); + FAIL_IF(!event_open(&event)); + + /* + * PMC5 and PMC6 only supports base events: + * ie 500fa and 600f4. Other combinations + * should fail. + */ + event_init(&event, 0x501e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x6001e); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x501fa); + FAIL_IF(!event_open(&event)); + + /* + * Events using PMC5 and PMC6 with random + * sampling bits set in event code should fail + * to schedule. + */ + event_init(&event, 0x35340500fa); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc56, "group_constraint_pmc56"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c new file mode 100644 index 000000000000..af7c5c75101c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for number of counters in use. + * The number of programmable counters is from + * performance monitor counter 1 to performance + * monitor counter 4 (PMC1-PMC4). If number of + * counters in use exceeds the limit, next event + * should fail to schedule. + */ + +static int group_constraint_pmc_count(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Test for number of counters in use. + * Use PMC1 to PMC4 for leader and 3 sibling + * events. Trying to open fourth event should + * fail here. + */ + e = &events[0]; + event_init(e, 0x1001a); + + e = &events[1]; + event_init(e, 0x200fc); + + e = &events[2]; + event_init(e, 0x30080); + + e = &events[3]; + event_init(e, 0x40054); + + e = &events[4]; + event_init(e, 0x0002c); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail on event 4 if constraint + * check fails + */ + for (i = 1; i < 5; i++) { + if (i == 4) + FAIL_IF(!event_open_with_group(&events[i], events[0].fd)); + else + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + } + + for (i = 1; i < 4; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc_count, "group_constraint_pmc_count"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c new file mode 100644 index 000000000000..9225618b846a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_1 0x14242 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L3 */ +#define EventCode_2 0x24242 + +/* + * Testcase for group constraint check for radix_scope_qual + * field which is used to program Monitor Mode Control + * egister (MMCR1) bit 18. + * All events in the group should match radix_scope_qual, + * bits otherwise event_open for the group should fail. + */ + +static int group_constraint_radix_scope_qual(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is aplicable on power10 only. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for radix_scope_qual bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x200fc); + + /* Expected to fail as sibling event doesn't request same radix_scope_qual bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_2); + /* Expected to pass as sibling event request same radix_scope_qual bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_radix_scope_qual, + "group_constraint_radix_scope_qual"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c new file mode 100644 index 000000000000..371cd05bb3ed --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The processor's L1 data cache was reloaded */ +#define EventCode1 0x21C040 +#define EventCode2 0x22C040 + +/* + * Testcase for group constraint check + * when using events with same PMC. + * Multiple events in a group shouldn't + * ask for same PMC. If so it should fail. + */ + +static int group_constraint_repeat(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Two events in a group using same PMC + * should fail to get scheduled. Usei same PMC2 + * for leader and sibling event which is expected + * to fail. + */ + event_init(&leader, EventCode1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode1); + + /* Expected to fail since sibling event is requesting same PMC as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode2); + + /* Expected to pass since sibling event is requesting different PMC */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_repeat, "group_constraint_repeat"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c new file mode 100644 index 000000000000..9f1197104e8c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here is PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load or Store only sampling + */ +#define p9_EventCode_1 0x13e35340401e0 +#define p9_EventCode_2 0x17d34340101ec +#define p9_EventCode_3 0x13e35340101ec +#define p10_EventCode_1 0x35340401e0 +#define p10_EventCode_2 0x35340101ec + +/* + * Testcase for group constraint check of thresh_cmp bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). + * All events in the group should match thresh compare bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_cmp(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p10_EventCode_1); + + /* Add the thresh_cmp value for leader in config1 */ + leader.attr.config1 = 1000; + FAIL_IF(event_open(&leader)); + + event_init(&event, p10_EventCode_2); + + /* Add the different thresh_cmp value from the leader event in config1 */ + event.attr.config1 = 2000; + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p10_EventCode_2); + + /* Add the same thresh_cmp value for leader and sibling event in config1 */ + event.attr.config1 = 1000; + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } else { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p9_EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, p9_EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p9_EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_cmp, "group_constraint_thresh_cmp"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c new file mode 100644 index 000000000000..e0852ebc1671 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete and issue to + * finished for cycles + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x34340101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_ctl bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCR0: 48-55). + * All events in the group should match thresh ctl bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_ctl(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh control test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_ctl bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh control test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_ctl bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_ctl, "group_constraint_thresh_ctl"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c new file mode 100644 index 000000000000..50a8cd843ce7 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x35540101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_sel bits which is + * used to program thresh select field in Monitor Mode Control Register A + * (MMCRA: 45-57). + * All events in the group should match thresh sel bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_sel(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh select test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_sel bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_sel bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_sel, "group_constraint_thresh_sel"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c new file mode 100644 index 000000000000..a2c18923dcec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread with PMC 2 */ +#define EventCode_1 0x26080 +/* All successful D-side store dispatches for this thread with PMC 4 */ +#define EventCode_2 0x46080 +/* All successful D-side store dispatches for this thread that were L2 Miss with PMC 3 */ +#define EventCode_3 0x36880 + +/* + * Testcase for group constraint check of unit and pmc bits which is + * used to program corresponding unit and pmc field in Monitor Mode + * Control Register 1 (MMCR1) + * One of the event in the group should use PMC 4 incase units field + * value is within 6 to 9 otherwise event_open for the group will fail. + */ +static int group_constraint_unit(void) +{ + struct event *e, events[3]; + + /* + * Check for platform support for the test. + * Constraint to use PMC4 with one of the event in group, + * when the unit is within 6 to 9 is only applicable on + * power9. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for unit bits */ + e = &events[0]; + event_init(e, EventCode_1); + + /* Expected to fail as PMC 4 is not used with unit field value 6 to 9 */ + FAIL_IF(!event_open(&events[0])); + + /* Init the events for the group contraint check for unit bits */ + e = &events[1]; + event_init(e, EventCode_2); + + /* Expected to pass as PMC 4 is used with unit field value 6 to 9 */ + FAIL_IF(event_open(&events[1])); + + /* Init the event for the group contraint unit test */ + e = &events[2]; + event_init(e, EventCode_3); + + /* Expected to fail as PMC4 is not being used */ + FAIL_IF(!event_open_with_group(&events[2], events[0].fd)); + + /* Expected to succeed as event using PMC4 */ + FAIL_IF(event_open_with_group(&events[2], events[1].fd)); + + event_close(&events[0]); + event_close(&events[1]); + event_close(&events[2]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_unit, "group_constraint_unit"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c new file mode 100644 index 000000000000..cff9ac170df6 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include +#include +#include "../sampling_tests/misc.h" + +/* + * Testcase for group constraint check for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). + * Test that pmc5/6 is excluded from constraint + * check when scheduled along with group of events. + */ + +static int group_pmc56_exclude_constraints(void) +{ + struct event *e, events[3]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PMC5/6 is excluded from constraint bit + * check along with group of events. Use + * group of events with PMC5, PMC6 and also + * event with cache bit (dc_ic) set. Test expects + * this set of events to go in as a group. + */ + e = &events[0]; + event_init(e, 0x500fa); + + e = &events[1]; + event_init(e, 0x600f4); + + e = &events[2]; + event_init(e, 0x22C040); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail if constraint check fails. + * Since we are asking for events in a group and since + * PMC5/PMC6 is excluded from group constraints, even_open + * should pass. + */ + for (i = 1; i < 3; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 3; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_pmc56_exclude_constraints, "group_pmc56_exclude_constraints"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c new file mode 100644 index 000000000000..a45b1da5b568 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Load Missed L1, for power9 its pointing to PM_LD_MISS_L1_FIN (0x2c04e) and + * for power10 its pointing to PM_LD_MISS_L1 (0x3e054) + * + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_MISS + */ +#define EventCode_1 0x10000 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_2 0x0100 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_DTLB + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_3 0x0103 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : Invalid ( > PERF_COUNT_HW_CACHE_RESULT_MAX) + */ +#define EventCode_4 0x030000 + +/* + * A perf test to check valid hardware cache events. + */ +static int hw_cache_event_type_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_1, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to success as its pointing to L1 load miss */ + FAIL_IF(event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_2, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have 0 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_3, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have -1 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_4, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as hardware cache event result type is Invalid */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(hw_cache_event_type_test, "hw_cache_event_type_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c new file mode 100644 index 000000000000..f51fcab837fc --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode_1 0x1340000001c040 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_2 0x14242 +/* Event code with IFM, EBB, BHRB bits set in event code */ +#define EventCode_3 0xf00000000000001e + +/* + * Some of the bits in the event code is + * reserved for specific platforms. + * Event code bits 52-59 are reserved in power9, + * whereas in power10, these are used for programming + * Monitor Mode Control Register 3 (MMCR3). + * Bit 9 in event code is reserved in power9, + * whereas it is used for programming "radix_scope_qual" + * bit 18 in Monitor Mode Control Register 1 (MMCR1). + * + * Testcase to ensure that using reserved bits in + * event code should cause event_open to fail. + */ + +static int invalid_event_code(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using MMCR3 bits and radix scope qual bits + * should fail in power9 and should succeed in power10. + * Init the events and check for pass/fail in event open. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init(&event, EventCode_1); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init(&event, EventCode_2); + FAIL_IF(event_open(&event)); + event_close(&event); + } else { + event_init(&event, EventCode_1); + FAIL_IF(!event_open(&event)); + + event_init(&event, EventCode_2); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(invalid_event_code, "invalid_event_code"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c new file mode 100644 index 000000000000..4c119c821b99 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode Control + * Register A (MMCRA) Random Sampling Mode (SM) value. + * As per Instruction Set Architecture (ISA), the values + * 0x5, 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E are reserved + * for sampling mode field. Test that having these reserved + * bit values should cause event_open to fail. + * Input event code uses these sampling bits along with + * 401e0 (PM_MRK_INST_CMPL). + */ + +static int reserved_bits_mmcra_sample_elig_mode(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA Random Sampling Mode (SM) values: 0x5 + * 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0x50401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x90401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0xD0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x190401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1D0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1A0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1E0401e0); + FAIL_IF(!event_open(&event)); + + /* + * MMCRA Random Sampling Mode (SM) value 0x10 + * is reserved in power10 and 0xC is reserved in + * power9. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init(&event, 0x100401e0); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + event_init(&event, 0xC0401e0); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_sample_elig_mode, + "reserved_bits_mmcra_sample_elig_mode"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c new file mode 100644 index 000000000000..4ea1c2f8913f --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode + * Control Register A (MMCRA) thresh_ctl bits. + * For MMCRA[48:51]/[52:55]) Threshold Start/Stop, + * 0b11110000/0b00001111 is reserved. + */ + +static int reserved_bits_mmcra_thresh_ctl(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * events Selection. 0b11110000/0b00001111 is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0xf0340401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x0f340401e0); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_thresh_ctl, "reserved_bits_mmcra_thresh_ctl"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index a785c6a173b9..9e67351fb252 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,9 +4,12 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test mmcra_thresh_marked_sample_test + mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ + mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ + mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ + bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test top_srcdir = ../../../../../.. include ../../../lib.mk -$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c new file mode 100644 index 000000000000..8182647c63c8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test to check bhrb filter + * map. All the branch filters are not supported + * in powerpc. Supported filters in: + * power10: any, any_call, ind_call, cond + * power9: any, any_call + * + * Testcase checks event open for invalid bhrb filter + * types should fail and valid filter types should pass. + * Testcase does validity check for these branch + * sample types. + */ + +/* Invalid types for powerpc */ +/* Valid bhrb filters in power9/power10 */ +int bhrb_filter_map_valid_common[] = { + PERF_SAMPLE_BRANCH_ANY, + PERF_SAMPLE_BRANCH_ANY_CALL, +}; + +/* Valid bhrb filters in power10 */ +int bhrb_filter_map_valid_p10[] = { + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_COND, +}; + +#define EventCode 0x1001e + +static int bhrb_filter_map_test(void) +{ + struct event event; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Skip for Generic compat PMU since + * bhrb filters is not supported + */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for the sampling test */ + event_init(&event, EventCode); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* Invalid filter maps which are expected to fail in event_open */ + for (i = PERF_SAMPLE_BRANCH_USER_SHIFT; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + /* Skip the valid branch sample type */ + if (i == PERF_SAMPLE_BRANCH_ANY_SHIFT || i == PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT \ + || i == PERF_SAMPLE_BRANCH_IND_CALL_SHIFT || i == PERF_SAMPLE_BRANCH_COND_SHIFT) + continue; + event.attr.branch_sample_type = 1U << i; + FAIL_IF(!event_open(&event)); + } + + /* valid filter maps for power9/power10 which are expected to pass in event_open */ + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_common[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + + /* + * filter maps which are valid in power10 and invalid in power9. + * PVR check is used here since PMU specific data like bhrb filter + * alternative tests is handled by respective PMU driver code and + * using PVR will work correctly for all cases including generic + * compat mode. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + } else { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(bhrb_filter_map_test, "bhrb_filter_map_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..4644c6782974 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * enabling branch stack doesn't crash in any + * environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU + * A fix for bhrb sampling crash was added in kernel + * via commit: b460b512417a ("powerpc/perf: Fix crashes + * with generic_compat_pmu & BHRB") + * + * This testcase exercises this code by doing branch + * stack enable for software event. s/w event is used + * since software event will work even in platform + * without PMU. + */ +static int bhrb_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* + * Return code of event_open is not + * considered since test just expects no crash from + * using PERF_SAMPLE_BRANCH_STACK. Also for environment + * like generic compat PMU, branch stack is unsupported. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(bhrb_no_crash_wo_pmu_test, "bhrb_no_crash_wo_pmu_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..839d2d225da0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * sampling with -intr-regs doesn't crash + * in any environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU. + * A fix for crash with intr_regs was + * addressed in commit: f75e7d73bdf7 in kernel. + * + * This testcase exercises this code path by doing + * intr_regs using software event. Software event is + * used since s/w event will work even in platform + * without PMU. + */ +static int intr_regs_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.disabled = 1; + + /* + * Return code of event_open is not considered + * since test just expects no crash from using + * PERF_SAMPLE_REGS_INTR. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(intr_regs_no_crash_wo_pmu_test, "intr_regs_no_crash_wo_pmu_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index c01a31d5f4ee..eac6420abdf1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -60,6 +60,8 @@ static void init_ev_encodes(void) switch (pvr) { case POWER10: + ev_mask_thd_cmp = 0x3ffff; + ev_shift_thd_cmp = 0; ev_mask_rsq = 1; ev_shift_rsq = 9; ev_mask_comb = 3; @@ -119,12 +121,10 @@ int check_extended_regs_support(void) return -1; } -int check_pvr_for_sampling_tests(void) +int platform_check_for_tests(void) { pvr = PVR_VER(mfspr(SPRN_PVR)); - platform_extended_mask = perf_get_platform_reg_mask(); - /* * Check for supported platforms * for sampling test @@ -136,19 +136,33 @@ int check_pvr_for_sampling_tests(void) * Check PMU driver registered by looking for * PPC_FEATURE2_EBB bit in AT_HWCAP2 */ - if (!have_hwcap2(PPC_FEATURE2_EBB)) + if (!have_hwcap2(PPC_FEATURE2_EBB) || !have_hwcap2(PPC_FEATURE2_ARCH_3_00)) goto out; + return 0; + +out: + printf("%s: Tests unsupported for this platform\n", __func__); + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + SKIP_IF(platform_check_for_tests()); + + platform_extended_mask = perf_get_platform_reg_mask(); /* check if platform supports extended regs */ if (check_extended_regs_support()) goto out; init_ev_encodes(); return 0; + out: printf("%s: Sampling tests un-supported\n", __func__); return -1; } + /* * Allocate mmap buffer of "mmap_pages" number of * pages. @@ -257,13 +271,32 @@ u64 *get_intr_regs(struct event *event, void *sample_buff) u64 *intr_regs; size_t size = 0; - if ((type ^ PERF_SAMPLE_REGS_INTR)) + if ((type ^ (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_BRANCH_STACK)) && + (type ^ PERF_SAMPLE_REGS_INTR)) return NULL; intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); if (!intr_regs) return NULL; + if (type & PERF_SAMPLE_BRANCH_STACK) { + /* + * PERF_RECORD_SAMPLE and PERF_SAMPLE_BRANCH_STACK: + * struct { + * struct perf_event_header hdr; + * u64 number_of_branches; + * struct perf_branch_entry[number_of_branches]; + * u64 data[]; + * }; + * struct perf_branch_entry { + * u64 from; + * u64 to; + * u64 misc; + * }; + */ + intr_regs += ((*intr_regs) * 3) + 1; + } + /* * First entry in the sample buffer used to specify * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access @@ -410,3 +443,95 @@ u64 get_reg_value(u64 *intr_regs, char *register_name) return *(intr_regs + register_bit_position); } + +int get_thresh_cmp_val(struct event event) +{ + int exp = 0; + u64 result = 0; + u64 value; + + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return EV_CODE_EXTRACT(event.attr.config, thd_cmp); + + value = EV_CODE_EXTRACT(event.attr.config1, thd_cmp); + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = -1; + else + result = (exp << 8) | value; + return result; +} + +/* + * Utility function to check for generic compat PMU + * by comparing base_platform value from auxv and real + * PVR value. + */ +static bool auxv_generic_compat_pmu(void) +{ + int base_pvr = 0; + + if (!strcmp(auxv_base_platform(), "power9")) + base_pvr = POWER9; + else if (!strcmp(auxv_base_platform(), "power10")) + base_pvr = POWER10; + + return (!base_pvr); +} + +/* + * Check for generic compat PMU. + * First check for presence of pmu_name from + * "/sys/bus/event_source/devices/cpu/caps". + * If doesn't exist, fallback to using value + * auxv. + */ +bool check_for_generic_compat_pmu(void) +{ + char pmu_name[256]; + + memset(pmu_name, 0, sizeof(pmu_name)); + if (read_sysfs_file("bus/event_source/devices/cpu/caps/pmu_name", + pmu_name, sizeof(pmu_name)) < 0) + return auxv_generic_compat_pmu(); + + if (!strcmp(pmu_name, "ISAv3")) + return true; + else + return false; +} + +/* + * Check if system is booted in compat mode. + */ +bool check_for_compat_mode(void) +{ + char *platform = auxv_platform(); + char *base_platform = auxv_base_platform(); + + return strcmp(platform, base_platform); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 7675f3177725..4181755cf5a0 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -5,6 +5,7 @@ * Copyright 2022, Kajol Jain, IBM Corp. */ +#include #include "../event.h" #define POWER10 0x80 @@ -17,6 +18,8 @@ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ #define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; @@ -35,6 +38,7 @@ extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; extern int pvr; extern u64 platform_extended_mask; extern int check_pvr_for_sampling_tests(void); +extern int platform_check_for_tests(void); /* * Event code field extraction macro. @@ -52,6 +56,9 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); +int get_thresh_cmp_val(struct event event); +bool check_for_generic_compat_pmu(void); +bool check_for_compat_mode(void); static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) { @@ -184,7 +191,7 @@ static inline int get_mmcra_sm(u64 mmcra, int pmc) return ((mmcra >> 42) & 0x3); } -static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc) { if (pvr == POWER10) return mmcra & BHRB_DISABLE; diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c new file mode 100644 index 000000000000..f0c003282630 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x21c040 + +/* + * A perf sampling test for mmcr1 + * fields : pmcxsel, unit, cache. + */ +static int mmcr1_sel_unit_cache(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_period = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + event_enable(&event); + + /* workload to make the event overflow */ + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + event_disable(&event); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that pmcxsel, unit and cache field of MMCR1 + * match with corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, pmcxsel) != + get_mmcr1_pmcxsel(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, unit) != + get_mmcr1_unit(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, cache) != + get_mmcr1_cache(get_reg_value(intr_regs, "MMCR1"), 1)); + + free(p); + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcr1_sel_unit_cache, "mmcr1_sel_unit_cache")); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c new file mode 100644 index 000000000000..14854694af62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for any branch mode */ +#define IFM_ANY_BRANCH 0x0 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb any call. + */ +static int mmcra_bhrb_any_test(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_ANY_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_any_test, "mmcra_bhrb_any_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c new file mode 100644 index 000000000000..3e08176eb7f8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for conditional branch mode */ +#define IFM_COND_BRANCH 0x3 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb cond call. + */ +static int mmcra_bhrb_cond_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_COND; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_COND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_cond_test, "mmcra_bhrb_cond_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c new file mode 100644 index 000000000000..488c865387e4 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_no_branch_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA for non-branch samples */ + FAIL_IF(!get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_no_branch_test, "mmcra_bhrb_disable_no_branch_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c new file mode 100644 index 000000000000..186a853c0f62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA */ + FAIL_IF(get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_test, "mmcra_bhrb_disable_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c new file mode 100644 index 000000000000..f0706730c099 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void indirect_branch_loop(void); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for indirect branch mode */ +#define IFM_IND_BRANCH 0x2 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb ind_call. + */ +static int mmcra_bhrb_ind_call_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_IND_CALL; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + indirect_branch_loop(); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_IND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_ind_call_test, "mmcra_bhrb_ind_call_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c new file mode 100644 index 000000000000..904362f172c9 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define p9_EventCode 0x13E35340401e0 +#define p10_EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_cmp(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Skip for comapt mode */ + SKIP_IF(check_for_compat_mode()); + + /* Init the event for the sampling test */ + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init_sampling(&event, p9_EventCode); + } else { + event_init_sampling(&event, p10_EventCode); + event.attr.config1 = 1000; + } + + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that thresh cmp match with the corresponding event code fields */ + FAIL_IF(get_thresh_cmp_val(event) != + get_mmcra_thd_cmp(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcra_thresh_cmp, "mmcra_thresh_cmp")); +} diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index a500639da97a..2f02cb54224d 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,15 +1,41 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ - ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall ptrace-perf-hwbreak + +TM_TESTS := ptrace-tm-gpr +TM_TESTS += ptrace-tm-spd-gpr +TM_TESTS += ptrace-tm-spd-tar +TM_TESTS += ptrace-tm-spd-vsx +TM_TESTS += ptrace-tm-spr +TM_TESTS += ptrace-tm-tar +TM_TESTS += ptrace-tm-vsx + +TESTS_64 := $(TM_TESTS) +TESTS_64 += core-pkey +TESTS_64 += perf-hwbreak +TESTS_64 += ptrace-hwbreak +TESTS_64 += ptrace-perf-hwbreak +TESTS_64 += ptrace-pkey +TESTS_64 += ptrace-syscall +TESTS_64 += ptrace-tar +TESTS_64 += ptrace-vsx + +TESTS += ptrace-gpr + +TEST_GEN_PROGS := $(TESTS) $(TESTS_64) + +LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk -CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie +TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) +TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) -$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h +$(TESTS_64): CFLAGS += -m64 +$(TM_TESTS): CFLAGS += -I../tm -mhtm + +CFLAGS += -I../../../../../usr/include -fno-pie + +$(OUTPUT)/ptrace-gpr: ptrace-gpr.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h +$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S new file mode 100644 index 000000000000..070e8443e3cc --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * test helper assembly functions + * + * Copyright (C) 2016 Simon Guo, IBM Corporation. + * Copyright 2022 Michael Ellerman, IBM Corporation. + */ +#include "basic_asm.h" + +#define GPR_SIZE __SIZEOF_LONG__ +#define FIRST_GPR 14 +#define NUM_GPRS (32 - FIRST_GPR) +#define STACK_SIZE (NUM_GPRS * GPR_SIZE) + +// gpr_child_loop(int *read_flag, int *write_flag, +// unsigned long *gpr_buf, double *fpr_buf); +FUNC_START(gpr_child_loop) + // r3 = read_flag + // r4 = write_flag + // r5 = gpr_buf + // r6 = fpr_buf + PUSH_BASIC_STACK(STACK_SIZE) + + // Save non-volatile GPRs + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + // Load GPRs with expected values + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r6 + + // Signal to parent that we're ready + li r0, 1 + stw r0, 0(r4) + + // Wait for parent to finish +1: lwz r0, 0(r3) + cmpwi r0, 0 + beq 1b // Loop while flag is zero + + // Save GPRs back to caller buffer + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Save FPRs + OP_REGS stfd, 8, 0, 31, r6 + + // Reload non-volatile GPRs + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + POP_BASIC_STACK(STACK_SIZE) + blr diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 17cd480c8780..9ed87d297799 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -7,72 +7,127 @@ #include "ptrace.h" #include "ptrace-gpr.h" #include "reg.h" +#include /* Tracer and Tracee Shared Data */ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +extern void gpr_child_loop(int *read_flag, int *write_flag, + unsigned long *gpr_buf, double *fpr_buf); -void gpr(void) +unsigned long child_gpr_val, parent_gpr_val; +double child_fpr_val, parent_fpr_val; + +static int child(void) { - unsigned long gpr_buf[18]; - float fpr_buf[32]; + unsigned long gpr_buf[32]; + double fpr_buf[32]; + int i; cptr = (int *)shmat(shm_id, NULL, 0); + memset(gpr_buf, 0, sizeof(gpr_buf)); + memset(fpr_buf, 0, sizeof(fpr_buf)); - asm __volatile__( - ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) - : - : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a) - : "memory", "r6", "r7", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "r16", "r17", - "r18", "r19", "r20", "r21", "r22", "r23", "r24", - "r25", "r26", "r27", "r28", "r29", "r30", "r31" - ); + for (i = 0; i < 32; i++) { + gpr_buf[i] = child_gpr_val; + fpr_buf[i] = child_fpr_val; + } - cptr[1] = 1; - - while (!cptr[0]) - asm volatile("" : : : "memory"); + gpr_child_loop(&cptr[0], &cptr[1], gpr_buf, fpr_buf); shmdt((void *)cptr); - store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); - if (validate_gpr(gpr_buf, GPR_3)) - exit(1); + FAIL_IF(validate_gpr(gpr_buf, parent_gpr_val)); + FAIL_IF(validate_fpr_double(fpr_buf, parent_fpr_val)); - if (validate_fpr_float(fpr_buf, c)) - exit(1); - - exit(0); + return 0; } int trace_gpr(pid_t child) { + __u64 tmp, fpr[32], *peeked_fprs; unsigned long gpr[18]; - unsigned long fpr[32]; FAIL_IF(start_trace(child)); + + // Check child GPRs match what we expect using GETREGS FAIL_IF(show_gpr(child, gpr)); - FAIL_IF(validate_gpr(gpr, GPR_1)); + FAIL_IF(validate_gpr(gpr, child_gpr_val)); + + // Check child FPRs match what we expect using GETFPREGS FAIL_IF(show_fpr(child, fpr)); - FAIL_IF(validate_fpr(fpr, FPR_1_REP)); - FAIL_IF(write_gpr(child, GPR_3)); - FAIL_IF(write_fpr(child, FPR_3_REP)); + memcpy(&tmp, &child_fpr_val, sizeof(tmp)); + FAIL_IF(validate_fpr(fpr, tmp)); + + // Check child FPRs match what we expect using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + free(peeked_fprs); + + // Write child GPRs using SETREGS + FAIL_IF(write_gpr(child, parent_gpr_val)); + + // Write child FPRs using SETFPREGS + memcpy(&tmp, &parent_fpr_val, sizeof(tmp)); + FAIL_IF(write_fpr(child, tmp)); + + // Check child FPRs match what we just set, using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + + // Write child FPRs using POKEUSR + FAIL_IF(poke_fprs(child, (unsigned long *)peeked_fprs)); + + // Child will check its FPRs match before exiting FAIL_IF(stop_trace(child)); return TEST_PASS; } +#ifndef __LONG_WIDTH__ +#define __LONG_WIDTH__ (sizeof(long) * 8) +#endif + +static uint64_t rand_reg(void) +{ + uint64_t result; + long r; + + r = random(); + + // Small values are typical + result = r & 0xffff; + if (r & 0x10000) + return result; + + // Pointers tend to have high bits set + result |= random() << (__LONG_WIDTH__ - 31); + if (r & 0x100000) + return result; + + // And sometimes we want a full 64-bit value + result ^= random() << 16; + + return result; +} + int ptrace_gpr(void) { - pid_t pid; + unsigned long seed; int ret, status; + pid_t pid; + + seed = getpid() ^ time(NULL); + printf("srand(%lu)\n", seed); + srand(seed); + + child_gpr_val = rand_reg(); + child_fpr_val = rand_reg(); + parent_gpr_val = rand_reg(); + parent_fpr_val = rand_reg(); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); @@ -81,7 +136,7 @@ int ptrace_gpr(void) return TEST_FAIL; } if (pid == 0) - gpr(); + exit(child()); if (pid) { pptr = (int *)shmat(shm_id, NULL, 0); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h index c5cd53181e2e..a5470b88bd08 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h @@ -12,10 +12,10 @@ #define FPR_3 0.003 #define FPR_4 0.004 -#define FPR_1_REP 0x3f50624de0000000 -#define FPR_2_REP 0x3f60624de0000000 -#define FPR_3_REP 0x3f689374c0000000 -#define FPR_4_REP 0x3f70624de0000000 +#define FPR_1_REP 0x3f50624dd2f1a9fcull +#define FPR_2_REP 0x3f60624dd2f1a9fcull +#define FPR_3_REP 0x3f689374bc6a7efaull +#define FPR_4_REP 0x3f70624dd2f1a9fcull /* Buffer must have 18 elements */ int validate_gpr(unsigned long *gpr, unsigned long val) @@ -36,13 +36,13 @@ int validate_gpr(unsigned long *gpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr(unsigned long *fpr, unsigned long val) +int validate_fpr(__u64 *fpr, __u64 val) { int i, found = 1; for (i = 0; i < 32; i++) { if (fpr[i] != val) { - printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val); + printf("FPR[%d]: %llx Expected: %llx\n", i, fpr[i], val); found = 0; } } @@ -53,7 +53,7 @@ int validate_fpr(unsigned long *fpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr_float(float *fpr, float val) +int validate_fpr_double(double *fpr, double val) { int i, found = 1; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 67ca297c5cca..5dc152b162df 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -12,15 +12,15 @@ int shm_id; unsigned long *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; void tm_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; printf("Starting the child\n"); cptr = (unsigned long *)shmat(shm_id, NULL, 0); @@ -29,12 +29,12 @@ trans: cptr[1] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" "beq 2f;" ASM_LOAD_GPR_IMMED(gpr_2) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_2) + ASM_LOAD_FPR(flt_2) "tsuspend.;" "li 7, 1;" "stw 7, 0(%[cptr1]);" @@ -70,12 +70,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); @@ -87,7 +87,7 @@ trans: int trace_tm_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 6f2bce1b6c5d..458cc1a70ccf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -12,10 +12,10 @@ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; -float d = FPR_4; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; +double d = FPR_4; __attribute__((used)) void wait_parent(void) { @@ -28,7 +28,7 @@ void tm_spd_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; cptr = (int *)shmat(shm_id, NULL, 0); @@ -36,7 +36,7 @@ trans: cptr[2] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" @@ -45,7 +45,7 @@ trans: ASM_LOAD_GPR_IMMED(gpr_2) "tsuspend.;" ASM_LOAD_GPR_IMMED(gpr_4) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_4) + ASM_LOAD_FPR(flt_4) "bl wait_parent;" "tresume.;" @@ -77,12 +77,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); } @@ -93,7 +93,7 @@ trans: int trace_tm_spd_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 5181ad9b4b6c..4e0233c0f2b3 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -4,6 +4,9 @@ * * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. */ + +#define __SANE_USERSPACE_TYPES__ + #include #include #include @@ -20,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -30,8 +34,8 @@ #define TEST_FAIL 1 struct fpr_regs { - unsigned long fpr[32]; - unsigned long fpscr; + __u64 fpr[32]; + __u64 fpscr; }; struct tm_spr_regs { @@ -318,7 +322,7 @@ fail: } /* FPR */ -int show_fpr(pid_t child, unsigned long *fpr) +int show_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; int ret, i; @@ -337,7 +341,7 @@ int show_fpr(pid_t child, unsigned long *fpr) return TEST_PASS; } -int write_fpr(pid_t child, unsigned long val) +int write_fpr(pid_t child, __u64 val) { struct fpr_regs *regs; int ret, i; @@ -360,7 +364,7 @@ int write_fpr(pid_t child, unsigned long val) return TEST_PASS; } -int show_ckpt_fpr(pid_t child, unsigned long *fpr) +int show_ckpt_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; struct iovec iov; @@ -437,6 +441,70 @@ int show_gpr(pid_t child, unsigned long *gpr) return TEST_PASS; } +long sys_ptrace(enum __ptrace_request request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, (void *)addr, data); +} + +// 33 because of FPSCR +#define PT_NUM_FPRS (33 * (sizeof(__u64) / sizeof(unsigned long))) + +__u64 *peek_fprs(pid_t child) +{ + unsigned long *fprs, *p, addr; + long ret; + int i; + + fprs = malloc(sizeof(unsigned long) * PT_NUM_FPRS); + if (!fprs) { + perror("malloc() failed"); + return NULL; + } + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)p); + if (ret) { + perror("ptrace(PTRACE_PEEKUSR) failed"); + return NULL; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)&addr); + if (!ret) { + printf("ptrace(PTRACE_PEEKUSR) succeeded unexpectedly!\n"); + return NULL; + } + + return (__u64 *)fprs; +} + +int poke_fprs(pid_t child, unsigned long *fprs) +{ + unsigned long *p, addr; + long ret; + int i; + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, *p); + if (ret) { + perror("ptrace(PTRACE_POKEUSR) failed"); + return -1; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, addr); + if (!ret) { + printf("ptrace(PTRACE_POKEUSR) succeeded unexpectedly!\n"); + return -1; + } + + return 0; +} + int write_gpr(pid_t child, unsigned long val) { struct pt_regs *regs; @@ -742,4 +810,3 @@ void analyse_texasr(unsigned long texasr) } void store_gpr(unsigned long *addr); -void store_fpr(float *addr); diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 93614b125ded..9357b186b13c 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -2,3 +2,4 @@ rfi_flush entry_flush spectre_v2 +uaccess_flush