From c7fa848ff01dad9ed3146a6b1a7d3622131bcedd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 3 Mar 2022 15:33:10 +1000 Subject: [PATCH 001/423] KVM: PPC: Book3S HV P9: Fix "lost kick" race When new work is created that requires attention from the hypervisor (e.g., to inject an interrupt into the guest), fast_vcpu_kick is used to pull the target vcpu out of the guest if it may have been running. Therefore the work creation side looks like this: vcpu->arch.doorbell_request = 1; kvmppc_fast_vcpu_kick_hv(vcpu) { smp_mb(); cpu = vcpu->cpu; if (cpu != -1) send_ipi(cpu); } And the guest entry side *should* look like this: vcpu->cpu = smp_processor_id(); smp_mb(); if (vcpu->arch.doorbell_request) { // do something (abort entry or inject doorbell etc) } But currently the store and load are flipped, so it is possible for the entry to see no doorbell pending, and the doorbell creation misses the store to set cpu, resulting lost work (or at least delayed until the next guest exit). Fix this by reordering the entry operations and adding a smp_mb between them. The P8 path appears to have a similar race which is commented but not addressed yet. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220303053315.1056880-2-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 41 +++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c886557638a1..6fa518f6501d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) int cpu; struct rcuwait *waitp; + /* + * rcuwait_wake_up contains smp_mb() which orders prior stores that + * create pending work vs below loads of cpu fields. The other side + * is the barrier in vcpu run that orders setting the cpu fields vs + * testing for pending work. + */ + waitp = kvm_arch_vcpu_get_wait(vcpu); if (rcuwait_wake_up(waitp)) ++vcpu->stat.generic.halt_wakeup; @@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; } tvcpu->arch.prodded = 1; - smp_mb(); + smp_mb(); /* This orders prodded store vs ceded load */ if (tvcpu->arch.ceded) kvmppc_fast_vcpu_kick_hv(tvcpu); break; @@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) pvc = core_info.vc[sub]; pvc->pcpu = pcpu + thr; for_each_runnable_thread(i, vcpu, pvc) { + /* + * XXX: is kvmppc_start_thread called too late here? + * It updates vcpu->cpu and vcpu->arch.thread_cpu + * which are used by kvmppc_fast_vcpu_kick_hv(), but + * kick is called after new exceptions become available + * and exceptions are checked earlier than here, by + * kvmppc_core_prepare_to_enter. + */ kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (need_resched() || !kvm->arch.mmu_ready) goto out; + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + /* + * Orders set cpu/thread_cpu vs testing for pending interrupts and + * doorbells below. The other side is when these fields are set vs + * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to + * kick a vCPU to notice the pending interrupt. + */ + smp_mb(); + if (!nested) { kvmppc_core_prepare_to_enter(vcpu); if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, @@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - vcpu->cpu = pcpu; - vcpu->arch.thread_cpu = pcpu; - vc->pcpu = pcpu; - local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.ptid = 0; - local_paca->kvm_hstate.fake_suspend = 0; - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); trace_kvm_guest_enter(vcpu); @@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; From b5149e229218118c9cd44a4d256f970ddcbf745b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 16:47:25 +1000 Subject: [PATCH 002/423] KVM: PPC: Book3S PR: Disable SCV when AIL could be disabled PR KVM does not support running with AIL enabled, and SCV does is not supported with AIL disabled. Fix this by ensuring the SCV facility is disabled with FSCR while a CPU could be running with AIL=0. The PowerNV host supports disabling AIL on a per-CPU basis, so SCV just needs to be disabled when a vCPU is being run. The pSeries machine can only switch AIL on a system-wide basis, so it must disable SCV support at boot if the configuration can potentially run a PR KVM guest. Also ensure a the FSCR[SCV] bit can not be enabled when emulating mtFSCR for the guest. SCV is not emulated for the PR guest at the moment, this just fixes the host crashes. Alternatives considered and rejected: - SCV support can not be disabled by PR KVM after boot, because it is advertised to userspace with HWCAP. - AIL can not be disabled on a per-CPU basis. At least when running on pseries it is a per-LPAR setting. - Support for real-mode SCV vectors will not be added because they are at 0x17000 so making such a large fixed head space causes immediate value limits to be exceeded, requiring a lot rework and more code. - Disabling SCV for any PR KVM possible kernel will cause a slowdown when not using PR KVM. - A boot time option to disable SCV to use PR KVM is user-hostile. - System call instruction emulation for SCV facility unavailable instructions is too complex and old emulation code was subtly broken and removed. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20220222064727.2314380-2-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++++ arch/powerpc/kernel/setup_64.c | 28 ++++++++++++++++++++++++++++ arch/powerpc/kvm/Kconfig | 9 +++++++++ arch/powerpc/kvm/book3s_pr.c | 26 +++++++++++++++++--------- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 55caeee37c08..b66dd6f775a4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -809,6 +809,10 @@ __start_interrupts: * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * + * KVM: + * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM + * ensures that FSCR[SCV] is disabled whenever it has to force AIL off. + * * Call convention: * * syscall register convention is in Documentation/powerpc/syscall64-abi.rst diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..d973ae7558e3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -197,6 +197,34 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* + * - PR KVM does not support AIL mode interrupts in the host + * while a PR guest is running. + * + * - SCV system call interrupt vectors are only implemented for + * AIL mode interrupts. + * + * - On pseries, AIL mode can only be enabled and disabled + * system-wide so when a PR VM is created on a pseries host, + * all CPUs of the host are set to AIL=0 mode. + * + * - Therefore host CPUs must not execute scv while a PR VM + * exists. + * + * - SCV support can not be disabled dynamically because the + * feature is advertised to host userspace. Disabling the + * facility and emulating it would be possible but is not + * implemented. + * + * - So SCV support is blanket disabled if PR KVM could possibly + * run. That is, PR support compiled in, booting on pseries + * with hash MMU. + */ + if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } + /* Enable AIL if possible */ if (!pseries_enable_reloc_on_exc()) { init_task.thread.fscr &= ~FSCR_SCV; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 18e58085447c..ddd88179110a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR guest in user mode (problem state) and emulating all privileged instructions and registers. + This is only available for hash MMU mode and only supports + guests that use hash MMU mode. + This is not as fast as using hypervisor mode, but works on machines where hypervisor mode is not available or not usable, and can emulate processors that are different from the host processor, including emulating 32-bit processors on a 64-bit host. + Selecting this option will cause the SCV facility to be + disabled when the kernel is booted on the pseries platform in + hash MMU mode (regardless of PR VMs running). When any PR VMs + are running, "AIL" mode is disabled which may slow interrupts + and system calls on the host. + config KVM_BOOK3S_HV_EXIT_TIMING bool "Detailed timing for hypervisor real-mode code" depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 34a801c3604a..7bf9e6ca5c2d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu->in_use = 0; svcpu_put(svcpu); -#endif /* Disable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV); + } +#endif vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 @@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; svcpu_put(svcpu); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV); + } #endif if (kvmppc_is_split_real(vcpu)) @@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); kvmppc_save_tm_pr(vcpu); - /* Enable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); - vcpu->cpu = -1; } @@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) { + if (fscr & FSCR_SCV) + fscr &= ~FSCR_SCV; /* SCV must not be enabled */ if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { /* TAR got dropped, drop it in shadow too */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); From 839d893b4067da14b9c46fda2dfd88b80aeed551 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 16:47:26 +1000 Subject: [PATCH 003/423] KVM: PPC: Book3S PR: Disallow AIL != 0 KVM PR does not implement address translation modes on interrupt, so it must not allow H_SET_MODE to succeed. The behaviour change caused by this mode is architected and not advisory (interrupts *must* behave differently). QEMU does not deal with differences in AIL support in the host. The solution to that is a spapr capability and corresponding KVM CAP, but this patch does not break things more than before (the host behaviour already differs, this change just disallows some modes that are not implemented properly). By happy coincidence, this allows PR Linux guests that are using the SCV facility to boot and run, because Linux disables the use of SCV if AIL can not be set to 3. This does not fix the underlying problem of missing SCV support (an OS could implement real-mode SCV vectors and try to enable the facility). The true fix for that is for KVM PR to emulate scv interrupts from the facility unavailable interrupt. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20220222064727.2314380-3-npiggin@gmail.com --- arch/powerpc/kvm/book3s_pr_papr.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 1f10e7dfcdd0..dc4f51ac84bc 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu) +{ + unsigned long mflags = kvmppc_get_gpr(vcpu, 4); + unsigned long resource = kvmppc_get_gpr(vcpu, 5); + + if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) { + /* KVM PR does not provide AIL!=0 to guests */ + if (mflags == 0) + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + else + kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63); + return EMULATE_DONE; + } + return EMULATE_FAIL; +} + #ifdef CONFIG_SPAPR_TCE_IOMMU static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) { @@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_logical_ci_load(vcpu); case H_LOGICAL_CI_STORE: return kvmppc_h_pr_logical_ci_store(vcpu); + case H_SET_MODE: + return kvmppc_h_pr_set_mode(vcpu); case H_XIRR: case H_CPPR: case H_EOI: @@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd) case H_CEDE: case H_LOGICAL_CI_LOAD: case H_LOGICAL_CI_STORE: + case H_SET_MODE: #ifdef CONFIG_KVM_XICS case H_XIRR: case H_CPPR: @@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = { H_BULK_REMOVE, H_PUT_TCE, H_CEDE, + H_SET_MODE, #ifdef CONFIG_KVM_XICS H_XIRR, H_CPPR, From f771b55731fc82b1e8e9ef123f6f1b8d8c92bc63 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 7 Mar 2022 13:26:25 +1100 Subject: [PATCH 004/423] KVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3 Use KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. [1] https://lists.nongnu.org/archive/html/qemu-ppc/2022-02/msg00437.html [2] https://lists.nongnu.org/archive/html/qemu-ppc/2022-02/msg00439.html Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas [mpe: Rebase onto 93b71801a827 from kvm-ppc-cap-210 branch, add EXPORT_SYMBOL] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220222064727.2314380-4-npiggin@gmail.com --- arch/powerpc/include/asm/setup.h | 2 ++ arch/powerpc/kvm/powerpc.c | 17 +++++++++++++++++ arch/powerpc/platforms/pseries/setup.c | 13 ++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..a555fb77258a 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -28,11 +28,13 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES +extern bool pseries_reloc_on_exception(void); extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); void __init pseries_little_endian_exceptions(void); #else +static inline bool pseries_reloc_on_exception(void) { return false; } static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9772b176e406..875c30c12db0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_PPC_AIL_MODE_3: + r = 0; + /* + * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode. + * The POWER9s can support it if the guest runs in hash mode, + * but QEMU doesn't necessarily query the capability in time. + */ + if (hv_enabled) { + if (kvmhv_on_pseries()) { + if (pseries_reloc_on_exception()) + r = 1; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { + r = 1; + } + } + break; default: r = 0; break; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..5bdbbe2151b1 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -353,6 +353,14 @@ static void pseries_lpar_idle(void) pseries_idle_epilog(); } +static bool pseries_reloc_on_exception_enabled; + +bool pseries_reloc_on_exception(void) +{ + return pseries_reloc_on_exception_enabled; +} +EXPORT_SYMBOL_GPL(pseries_reloc_on_exception); + /* * Enable relocation on during exceptions. This has partition wide scope and * may take a while to complete, if it takes longer than one second we will @@ -377,6 +385,7 @@ bool pseries_enable_reloc_on_exc(void) " on exceptions: %ld\n", rc); return false; } + pseries_reloc_on_exception_enabled = true; return true; } @@ -404,7 +413,9 @@ void pseries_disable_reloc_on_exc(void) break; mdelay(get_longbusy_msecs(rc)); } - if (rc != H_SUCCESS) + if (rc == H_SUCCESS) + pseries_reloc_on_exception_enabled = false; + else pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", rc); } From d14eb80e27795b7b20060f7b151cdfe39722a813 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 17 Mar 2022 23:55:37 +0100 Subject: [PATCH 005/423] drm/panel: ili9341: fix optional regulator handling If the optional regulator lookup fails, reset the pointer to NULL. Other functions such as mipi_dbi_poweron_reset_conditional() only do a NULL pointer check and will otherwise dereference the error pointer. Fixes: 5a04227326b04c15 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Daniel Mack Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220317225537.826302-1-daniel@zonque.org --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 2c3378a259b1..e1542451ef9d 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -612,8 +612,10 @@ static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc, int ret; vcc = devm_regulator_get_optional(dev, "vcc"); - if (IS_ERR(vcc)) + if (IS_ERR(vcc)) { dev_err(dev, "get optional vcc failed\n"); + vcc = NULL; + } dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver, struct mipi_dbi_dev, drm); From c5c948aa894a831f96fccd025e47186b1ee41615 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 15 Mar 2022 14:53:24 -0400 Subject: [PATCH 006/423] drm/amd: Add USBC connector ID [Why&How] Add a dedicated AMDGPU specific ID for use with newer ASICs that support USB-C output Signed-off-by: Aurabindo Pillai Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ObjectID.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f592..a0f0a17e224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ From 24b488061b97a6c6ff82c433e6843eaf54f41f3c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 24 Mar 2022 12:10:55 +0100 Subject: [PATCH 007/423] MAINTAINERS: update Lorenzo's email address Using my kernel.org email. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/e98fcf759f8c23a9736f1c4d20ca0437e4b145de.1648120046.git.lorenzo@kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 91c04cb65247..e406a6db67d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12222,7 +12222,7 @@ F: drivers/mmc/host/mtk-sd.c MEDIATEK MT76 WIRELESS LAN DRIVER M: Felix Fietkau -M: Lorenzo Bianconi +M: Lorenzo Bianconi M: Ryder Lee R: Shayne Chen R: Sean Wang From caaf2ae712b7cc3c7717898fe267dbf882a502ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 24 Jan 2022 14:03:24 +0100 Subject: [PATCH 008/423] dma-buf: Add dma_fence_array_for_each (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to iterate over all fences in a dma_fence_array object. v2 (Jason Ekstrand) - Return NULL from dma_fence_array_first if head == NULL. This matches the iterator behavior of dma_fence_chain_for_each in that it iterates zero times if head == NULL. - Return NULL from dma_fence_array_next if index > array->num_fences. Signed-off-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Reviewed-by: Christian König Cc: Daniel Vetter Cc: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210610210925.642582-2-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/dma-buf/dma-fence-array.c | 27 +++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index cb1bacb5a42b..52b85d292383 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -219,3 +219,30 @@ bool dma_fence_match_context(struct dma_fence *fence, u64 context) return true; } EXPORT_SYMBOL(dma_fence_match_context); + +struct dma_fence *dma_fence_array_first(struct dma_fence *head) +{ + struct dma_fence_array *array; + + if (!head) + return NULL; + + array = to_dma_fence_array(head); + if (!array) + return head; + + return array->fences[0]; +} +EXPORT_SYMBOL(dma_fence_array_first); + +struct dma_fence *dma_fence_array_next(struct dma_fence *head, + unsigned int index) +{ + struct dma_fence_array *array = to_dma_fence_array(head); + + if (!array || index >= array->num_fences) + return NULL; + + return array->fences[index]; +} +EXPORT_SYMBOL(dma_fence_array_next); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index fec374f69e12..e34dcb0bb462 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -61,6 +61,19 @@ to_dma_fence_array(struct dma_fence *fence) return container_of(fence, struct dma_fence_array, base); } +/** + * dma_fence_array_for_each - iterate over all fences in array + * @fence: current fence + * @index: index into the array + * @head: potential dma_fence_array object + * + * Test if @array is a dma_fence_array object and if yes iterate over all fences + * in the array. If not just iterate over the fence in @array itself. + */ +#define dma_fence_array_for_each(fence, index, head) \ + for (index = 0, fence = dma_fence_array_first(head); fence; \ + ++(index), fence = dma_fence_array_next(head, index)) + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, @@ -68,4 +81,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, bool dma_fence_match_context(struct dma_fence *fence, u64 context); +struct dma_fence *dma_fence_array_first(struct dma_fence *head); +struct dma_fence *dma_fence_array_next(struct dma_fence *head, + unsigned int index); + #endif /* __LINUX_DMA_FENCE_ARRAY_H */ From 64a8f92fd783e750cdb81af75942dcd53bbf61bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 11 Mar 2022 10:27:53 +0100 Subject: [PATCH 009/423] dma-buf: add dma_fence_unwrap v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a general purpose helper to deep dive into dma_fence_chain/dma_fence_array structures and iterate over all the fences in them. This is useful when we need to flatten out all fences in those structures. v2: some selftests cleanup, improved function naming and documentation Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220311110244.1245-1-christian.koenig@amd.com --- Documentation/driver-api/dma-buf.rst | 6 + drivers/dma-buf/Makefile | 1 + drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-fence-unwrap.c | 261 ++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 2 + include/linux/dma-fence-chain.h | 2 + include/linux/dma-fence-unwrap.h | 95 ++++++++++ 7 files changed, 368 insertions(+) create mode 100644 drivers/dma-buf/st-dma-fence-unwrap.c create mode 100644 include/linux/dma-fence-unwrap.h diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 55006678394a..36a76cbe9095 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -185,6 +185,12 @@ DMA Fence Chain .. kernel-doc:: include/linux/dma-fence-chain.h :internal: +DMA Fence unwrap +~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/dma-fence-unwrap.h + :internal: + DMA Fence uABI/Sync File ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 511805dbeb75..4c9eb53ba3f8 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -12,6 +12,7 @@ dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \ st-dma-fence-chain.o \ + st-dma-fence-unwrap.o \ st-dma-resv.o obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 97d73aaa31da..851965867d9c 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,4 +12,5 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_fence_unwrap, dma_fence_unwrap) selftest(dma_resv, dma_resv) diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c new file mode 100644 index 000000000000..039f016b57be --- /dev/null +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + */ + +#include +#if 0 +#include +#include +#include +#include +#include +#include +#include +#endif + +#include "selftest.h" + +#define CHAIN_SZ (4 << 10) + +static inline struct mock_fence { + struct dma_fence base; + spinlock_t lock; +} *to_mock_fence(struct dma_fence *f) { + return container_of(f, struct mock_fence, base); +} + +static const char *mock_name(struct dma_fence *f) +{ + return "mock"; +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, +}; + +static struct dma_fence *mock_fence(void) +{ + struct mock_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + spin_lock_init(&f->lock); + dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + + return &f->base; +} + +static struct dma_fence *mock_array(unsigned int num_fences, ...) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + va_list valist; + int i; + + fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) + return NULL; + + va_start(valist, num_fences); + for (i = 0; i < num_fences; ++i) + fences[i] = va_arg(valist, typeof(*fences)); + va_end(valist); + + array = dma_fence_array_create(num_fences, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) + goto cleanup; + return &array->base; + +cleanup: + for (i = 0; i < num_fences; ++i) + dma_fence_put(fences[i]); + kfree(fences); + return NULL; +} + +static struct dma_fence *mock_chain(struct dma_fence *prev, + struct dma_fence *fence) +{ + struct dma_fence_chain *f; + + f = dma_fence_chain_alloc(); + if (!f) { + dma_fence_put(prev); + dma_fence_put(fence); + return NULL; + } + + dma_fence_chain_init(f, prev, fence, 1); + return &f->base; +} + +static int sanitycheck(void *arg) +{ + struct dma_fence *f, *chain, *array; + int err = 0; + + f = mock_fence(); + if (!f) + return -ENOMEM; + + array = mock_array(1, f); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(chain); + return err; +} + +static int unwrap_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, array) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(array); + return 0; +} + +static int unwrap_chain(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + chain = mock_chain(f1, f2); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +static int unwrap_chain_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +int dma_fence_unwrap(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(unwrap_array), + SUBTEST(unwrap_chain), + SUBTEST(unwrap_chain_array), + }; + + return subtests(tests, NULL); +} diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index e34dcb0bb462..ec7f25def392 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -69,6 +69,8 @@ to_dma_fence_array(struct dma_fence *fence) * * Test if @array is a dma_fence_array object and if yes iterate over all fences * in the array. If not just iterate over the fence in @array itself. + * + * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_array_for_each(fence, index, head) \ for (index = 0, fence = dma_fence_array_first(head); fence; \ diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 10d51bcdf7b7..4bdf0b96da28 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -112,6 +112,8 @@ static inline void dma_fence_chain_free(struct dma_fence_chain *chain) * * Iterate over all fences in the chain. We keep a reference to the current * fence while inside the loop which must be dropped when breaking out. + * + * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_chain_for_each(iter, head) \ for (iter = dma_fence_get(head); iter; \ diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h new file mode 100644 index 000000000000..77e335a1bcac --- /dev/null +++ b/include/linux/dma-fence-unwrap.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * fence-chain: chain fences together in a timeline + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Authors: + * Christian König + */ + +#ifndef __LINUX_DMA_FENCE_UNWRAP_H +#define __LINUX_DMA_FENCE_UNWRAP_H + +#include +#include + +/** + * struct dma_fence_unwrap - cursor into the container structure + * + * Should be used with dma_fence_unwrap_for_each() iterator macro. + */ +struct dma_fence_unwrap { + /** + * @chain: potential dma_fence_chain, but can be other fence as well + */ + struct dma_fence *chain; + /** + * @array: potential dma_fence_array, but can be other fence as well + */ + struct dma_fence *array; + /** + * @index: last returned index if @array is really a dma_fence_array + */ + unsigned int index; +}; + +/* Internal helper to start new array iteration, don't use directly */ +static inline struct dma_fence * +__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor) +{ + cursor->array = dma_fence_chain_contained(cursor->chain); + cursor->index = 0; + return dma_fence_array_first(cursor->array); +} + +/** + * dma_fence_unwrap_first - return the first fence from fence containers + * @head: the entrypoint into the containers + * @cursor: current position inside the containers + * + * Unwraps potential dma_fence_chain/dma_fence_array containers and return the + * first fence. + */ +static inline struct dma_fence * +dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor) +{ + cursor->chain = dma_fence_get(head); + return __dma_fence_unwrap_array(cursor); +} + +/** + * dma_fence_unwrap_next - return the next fence from a fence containers + * @cursor: current position inside the containers + * + * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return + * the next fence from them. + */ +static inline struct dma_fence * +dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) +{ + struct dma_fence *tmp; + + ++cursor->index; + tmp = dma_fence_array_next(cursor->array, cursor->index); + if (tmp) + return tmp; + + cursor->chain = dma_fence_chain_walk(cursor->chain); + return __dma_fence_unwrap_array(cursor); +} + +/** + * dma_fence_unwrap_for_each - iterate over all fences in containers + * @fence: current fence + * @cursor: current position inside the containers + * @head: starting point for the iterator + * + * Unwrap dma_fence_chain and dma_fence_array containers and deep dive into all + * potential fences in them. If @head is just a normal fence only that one is + * returned. + */ +#define dma_fence_unwrap_for_each(fence, cursor, head) \ + for (fence = dma_fence_unwrap_first(head, cursor); fence; \ + fence = dma_fence_unwrap_next(cursor)) + +#endif From 519f490db07e1a539490612f376487f61e48e39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 11 Mar 2022 10:32:26 +0100 Subject: [PATCH 010/423] dma-buf/sync-file: fix warning about fence containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma_fence_chain containers can show up in sync_files as well resulting in warnings that those can't be added to dma_fence_array containers when merging multiple sync_files together. Solve this by using the dma_fence_unwrap iterator to deep dive into the contained fences and then add those flatten out into a dma_fence_array. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220311110244.1245-2-christian.koenig@amd.com --- drivers/dma-buf/sync_file.c | 141 +++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 394e6e1e9686..b8dea4ec123b 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -5,6 +5,7 @@ * Copyright (C) 2012 Google, Inc. */ +#include #include #include #include @@ -172,20 +173,6 @@ static int sync_file_set_fence(struct sync_file *sync_file, return 0; } -static struct dma_fence **get_fences(struct sync_file *sync_file, - int *num_fences) -{ - if (dma_fence_is_array(sync_file->fence)) { - struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); - - *num_fences = array->num_fences; - return array->fences; - } - - *num_fences = 1; - return &sync_file->fence; -} - static void add_fence(struct dma_fence **fences, int *i, struct dma_fence *fence) { @@ -210,86 +197,97 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { + struct dma_fence *a_fence, *b_fence, **fences; + struct dma_fence_unwrap a_iter, b_iter; + unsigned int index, num_fences; struct sync_file *sync_file; - struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; - int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) return NULL; - a_fences = get_fences(a, &a_num_fences); - b_fences = get_fences(b, &b_num_fences); - if (a_num_fences > INT_MAX - b_num_fences) - goto err; + num_fences = 0; + dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence) + ++num_fences; + dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence) + ++num_fences; - num_fences = a_num_fences + b_num_fences; + if (num_fences > INT_MAX) + goto err_free_sync_file; fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); if (!fences) - goto err; + goto err_free_sync_file; /* - * Assume sync_file a and b are both ordered and have no - * duplicates with the same context. + * We can't guarantee that fences in both a and b are ordered, but it is + * still quite likely. * - * If a sync_file can only be created with sync_file_merge - * and sync_file_create, this is a reasonable assumption. + * So attempt to order the fences as we pass over them and merge fences + * with the same context. */ - for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { - struct dma_fence *pt_a = a_fences[i_a]; - struct dma_fence *pt_b = b_fences[i_b]; - if (pt_a->context < pt_b->context) { - add_fence(fences, &i, pt_a); + index = 0; + for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter), + b_fence = dma_fence_unwrap_first(b->fence, &b_iter); + a_fence || b_fence; ) { - i_a++; - } else if (pt_a->context > pt_b->context) { - add_fence(fences, &i, pt_b); + if (!b_fence) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (!a_fence) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (a_fence->context < b_fence->context) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (b_fence->context < a_fence->context) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno, + a_fence->ops)) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); - i_b++; } else { - if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno, - pt_a->ops)) - add_fence(fences, &i, pt_a); - else - add_fence(fences, &i, pt_b); - - i_a++; - i_b++; + add_fence(fences, &index, b_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); } } - for (; i_a < a_num_fences; i_a++) - add_fence(fences, &i, a_fences[i_a]); + if (index == 0) + add_fence(fences, &index, dma_fence_get_stub()); - for (; i_b < b_num_fences; i_b++) - add_fence(fences, &i, b_fences[i_b]); + if (num_fences > index) { + struct dma_fence **tmp; - if (i == 0) - fences[i++] = dma_fence_get(a_fences[0]); - - if (num_fences > i) { - nfences = krealloc_array(fences, i, sizeof(*fences), GFP_KERNEL); - if (!nfences) - goto err; - - fences = nfences; + /* Keep going even when reducing the size failed */ + tmp = krealloc_array(fences, index, sizeof(*fences), + GFP_KERNEL); + if (tmp) + fences = tmp; } - if (sync_file_set_fence(sync_file, fences, i) < 0) - goto err; + if (sync_file_set_fence(sync_file, fences, index) < 0) + goto err_put_fences; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; -err: - while (i) - dma_fence_put(fences[--i]); +err_put_fences: + while (index) + dma_fence_put(fences[--index]); kfree(fences); + +err_free_sync_file: fput(sync_file->file); return NULL; - } static int sync_file_release(struct inode *inode, struct file *file) @@ -398,11 +396,13 @@ static int sync_fill_fence_info(struct dma_fence *fence, static long sync_file_ioctl_fence_info(struct sync_file *sync_file, unsigned long arg) { - struct sync_file_info info; struct sync_fence_info *fence_info = NULL; - struct dma_fence **fences; + struct dma_fence_unwrap iter; + struct sync_file_info info; + unsigned int num_fences; + struct dma_fence *fence; + int ret; __u32 size; - int num_fences, ret, i; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; @@ -410,7 +410,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (info.flags || info.pad) return -EINVAL; - fences = get_fences(sync_file, &num_fences); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) + ++num_fences; /* * Passing num_fences = 0 means that userspace doesn't want to @@ -433,8 +435,11 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!fence_info) return -ENOMEM; - for (i = 0; i < num_fences; i++) { - int status = sync_fill_fence_info(fences[i], &fence_info[i]); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) { + int status; + + status = sync_fill_fence_info(fence, &fence_info[num_fences++]); info.status = info.status <= 0 ? info.status : status; } From 32f90e65251981f061eec883b0fe9e75d74e9665 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 15 Mar 2022 14:46:28 +0800 Subject: [PATCH 011/423] drm/amdgpu: prevent memory wipe in suspend/shutdown stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GPUs with RAS enabled, below call trace is observed when suspending or shutting down device. The cause is we have enabled memory wipe flag for BOs on such GPUs by default, and such BOs will go to memory wipe by amdgpu_fill_buffer, however, because ring is off already, it fails to clean up the memory and throw this error message. So add a suspend/shutdown check before wipping memory. [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to clear memory with ring turned off. v2: fix coding style issue Fixes: fc6ea4bee13071 ("drm/amdgpu: Wipe all VRAM on free when RAS is enabled") Signed-off-by: Guchun Chen Reviewed-by: Christian König Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25731719c627..940752488330 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1284,6 +1284,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, */ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct dma_fence *fence = NULL; struct amdgpu_bo *abo; int r; @@ -1303,7 +1304,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); if (bo->resource->mem_type != TTM_PL_VRAM || - !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || + adev->in_suspend || adev->shutdown) return; if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) From 2d505453f38e18d42ba7d5428aaa17aaa7752c65 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 15 Mar 2022 14:59:28 +0800 Subject: [PATCH 012/423] drm/amdgpu: conduct a proper cleanup of PDB bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use amdgpu_bo_free_kernel instead of amdgpu_bo_unref to perform a proper cleanup of PDB bo. v2: update subject to be more accurate Signed-off-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 431742eb7811..08ceabd6c853 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1721,7 +1721,7 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; From 60d61f4ed6ead43ad2de31ebb8d1d27c57290529 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 15 Mar 2022 18:19:06 +0800 Subject: [PATCH 013/423] drm/amdgpu/pm: fix the Stable pstate Test in amdgpu_test If GFX DPM is disbaled, Stable pstate Test in amdgpu_test fails. Check GFX DPM statue before change clock level Log: [ 46.595274] [drm] Initialized amdgpu 3.46.0 20150101 for 0000:02:00.0 on minor 0 [ 46.599929] fbcon: amdgpudrmfb (fb0) is primary device [ 46.785753] Console: switching to colour frame buffer device 240x67 [ 46.811765] amdgpu 0000:02:00.0: [drm] fb0: amdgpudrmfb frame buffer device [ 131.398407] amdgpu 0000:02:00.0: amdgpu: Failed to set performance level! Signed-off-by: Yifan Zhang Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 7bfac029e513..b81711c4ff33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -991,7 +991,7 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu, return -EINVAL; } - if (sclk_min && sclk_max) { + if (sclk_min && sclk_max && smu_v13_0_5_clk_dpm_is_enabled(smu, SMU_SCLK)) { ret = smu_v13_0_5_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_min, From 1647b54ed55d4d48c7199d439f8834626576cbe9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Mar 2022 11:41:48 +0300 Subject: [PATCH 014/423] drm/amdgpu: fix off by one in amdgpu_gfx_kiq_acquire() This post-op should be a pre-op so that we do not pass -1 as the bit number to test_bit(). The current code will loop downwards from 63 to -1. After changing to a pre-op, it loops from 63 to 0. Fixes: 71c37505e7ea ("drm/amdgpu/gfx: move more common KIQ code to amdgpu_gfx.c") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8fe939976224..28a736c507bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; From b818a5d374542ccec73dcfe578a081574029820e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Mar 2022 18:02:11 -0500 Subject: [PATCH 015/423] drm/amdgpu/gmc: use PCI BARs for APUs in passthrough MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the GPU is passed through to a guest VM, use the PCI BAR for CPU FB access rather than the physical address of carve out. The physical address is not valid in a guest. v2: Fix HDP handing as suggested by Michel Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3987ecb24ef4..49f734137f15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5733,7 +5733,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5749,7 +5749,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3c1d440824a7..5228421b0f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -814,7 +814,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 344d819b4c1b..979da6f510e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU && - adev->gmc.real_vram_size > adev->gmc.aper_size) { + if ((adev->flags & AMD_IS_APU) && + adev->gmc.real_vram_size > adev->gmc.aper_size && + !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ca9841d5669f..1932a3e4af7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 08ceabd6c853..6009fbfdcc19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1456,7 +1456,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = From 0d8e4eb337644cab528ff3844675d58496ec22db Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 8 Mar 2022 11:26:41 +0800 Subject: [PATCH 016/423] drm/amdgpu: add workarounds for VCN TMZ issue on CHIP_RAVEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is a hardware issue that VCN can't handle a GTT backing stored TMZ buffer on CHIP_RAVEN series ASIC. Move such a TMZ buffer to VRAM domain before command submission as a workaround. v2: - Use patch_cs_in_place callback. v3: - Bail out early if unsecure IBs. Suggested-by: Christian König Signed-off-by: Lang Yu Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dff54190b96c..f0fbcda76f5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -24,6 +24,7 @@ #include #include "amdgpu.h" +#include "amdgpu_cs.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" #include "soc15.h" @@ -1900,6 +1901,75 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_powergating_state = vcn_v1_0_set_powergating_state, }; +/* + * It is a hardware issue that VCN can't handle a GTT TMZ buffer on + * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain + * before command submission as a workaround. + */ +static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; + + bo = mapping->bo_va->base.bo; + if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED)) + return 0; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r); + return r; + } + + return r; +} + +static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t msg_lo = 0, msg_hi = 0; + int i, r; + + if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) + return 0; + + for (i = 0; i < ib->length_dw; i += 2) { + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); + + if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + msg_lo = val; + } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + msg_hi = val; + } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) { + r = vcn_v1_0_validate_bo(p, job, + ((u64)msg_hi) << 32 | msg_lo); + if (r) + return r; + } + } + + return 0; +} + static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1910,6 +1980,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + From 93dde6ccd66d29502506e95f568fd0f49d575d27 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 21 Mar 2022 12:50:36 +0800 Subject: [PATCH 017/423] drm/amdgpu/pm: add asic smu support check It must check asic whether support smu before call smu powerplay function, otherwise it may cause null point on no support smu asic. Signed-off-by: Stanley.Yang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 89fbee568be4..c73fb73e9628 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -500,6 +500,9 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size) struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_pages_num(smu, size); mutex_unlock(&adev->pm.mutex); @@ -512,6 +515,9 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_channel_flag(smu, size); mutex_unlock(&adev->pm.mutex); From 3107e1a7ae088ee94323fe9ab05dbefd65b3077f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 18 Mar 2022 11:10:34 -0400 Subject: [PATCH 018/423] drm/amd/display: Fix p-state allow debug index on dcn31 [Why] It changed since dcn30 but the hubbub31 constructor hasn't been modified to reflect this. [How] Update the value in the constructor to 0x6 so we're checking the right bits for p-state allow. It worked before by accident, but can falsely assert 0 depending on HW state transitions. The most frequent of which appears to be when all pipes turn off during IGT tests. Cc: Harry Wentland Fixes: e7031d8258f1b4 ("drm/amd/display: Add pstate verification and recovery for DCN31") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Eric Yang Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 3e6d6ebd199e..51c5f3685470 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31, hubbub31->detile_buf_size = det_size_kb * 1024; hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024; hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB; + + hubbub31->debug_test_index_pstate = 0x6; } From 5e8a71cf13bc9184fee915b2220be71b4c6cac74 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 7 Mar 2022 18:31:29 -0500 Subject: [PATCH 019/423] drm/amd/display: fix audio format not updated after edid updated [why] for the case edid change only changed audio format. driver still need to update stream. Reviewed-by: Alvin Lee Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7af153434e9e..d251c3f3a714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) return false; return true; From 6bf528ec91fb96e186461215c8f76265c5a35250 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 28 Feb 2022 19:01:59 -0500 Subject: [PATCH 020/423] drm/amd/display: Reduce stack size Linux kernel enabled more compilation restrictions related to the stack size, which caused compilation failures in our code. This commit reduces the allocation size by allocating the required memory dynamically. Reviewed-by: Harry Wentland Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c3e141c19a77..ad757b59e00e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2056,7 +2056,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; - struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0}; + struct dc_crtc_timing *hw_crtc_timing; uint64_t phase[MAX_PIPES]; uint64_t modulo[MAX_PIPES]; unsigned int pclk; @@ -2067,6 +2067,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, uint32_t dp_ref_clk_100hz = dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10; + hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL); + if (!hw_crtc_timing) + return master; + if (dc->config.vblank_alignment_dto_params && dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) { embedded_h_total = @@ -2130,6 +2134,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, } } + + kfree(hw_crtc_timing); return master; } From 7d56a154e22ffb3613fdebf83ec34d5225a22993 Mon Sep 17 00:00:00 2001 From: Chiawen Huang Date: Thu, 10 Mar 2022 00:07:59 +0800 Subject: [PATCH 021/423] drm/amd/display: FEC check in timing validation [Why] disable/enable leads FEC mismatch between hw/sw FEC state. [How] check FEC status to fastboot on/off. Reviewed-by: Anthony Koo Acked-by: Alex Hung Signed-off-by: Chiawen Huang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f6e19efea756..75f9c97bebb0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1496,6 +1496,10 @@ bool dc_validate_boot_timing(const struct dc *dc, if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; + /* Check for FEC status*/ + if (link->link_enc->funcs->fec_is_active(link->link_enc)) + return false; + enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); if (enc_inst == ENGINE_ID_UNKNOWN) From bd219322dbb41cc7c753da3a6936ce09d502f113 Mon Sep 17 00:00:00 2001 From: Oliver Logush Date: Tue, 8 Mar 2022 10:34:04 -0500 Subject: [PATCH 022/423] drm/amd/display: Add fSMC_MSG_SetDtbClk support [why] Needed to support dcn315 Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Oliver Logush Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn315/dcn315_smu.c | 19 +++++++++++++++---- .../display/dc/clk_mgr/dcn315/dcn315_smu.h | 4 +++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 880ffea2afc6..2600313fea57 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define VBIOSSMC_MSG_SetDppclkFreq 0x06 ///< Set DPP clock frequency in MHZ #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ -#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency -#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK frequency, return frequemcy in MHZ +#define VBIOSSMC_MSG_GetDtbclkFreq 0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency +#define VBIOSSMC_MSG_SetDtbClk 0x0A ///< Set dtb clock frequency, return frequemcy in MHZ #define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power #define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event @@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr) return (dprefclk_get_mhz * 1000); } -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr) +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) { int fclk_get_mhz = -1; if (clk_mgr->smu_present) { fclk_get_mhz = dcn315_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_GetFclkFrequency, + VBIOSSMC_MSG_GetDtbclkFreq, 0); } return (fclk_get_mhz * 1000); } + +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) +{ + if (!clk_mgr->smu_present) + return; + + dcn315_smu_send_msg_with_param( + clk_mgr, + VBIOSSMC_MSG_SetDtbClk, + enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h index 66fa42f8dd18..5aa3275ac7d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h @@ -37,6 +37,7 @@ #define NUM_SOC_VOLTAGE_LEVELS 4 #define NUM_DF_PSTATE_LEVELS 4 + typedef struct { uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz) uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz) @@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr); -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr); +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr); +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable); #endif /* DAL_DC_315_SMU_H_ */ From c9fbf6435162ed5fb7201d1d4adf6585c6a8c327 Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Fri, 11 Mar 2022 11:35:29 -0500 Subject: [PATCH 023/423] drm/amd/display: Update VTEM Infopacket definition [Why & How] The latest HDMI SPEC has updated the VTEM packet structure, so change the VTEM Infopacket defined in the driver side to align with the SPEC. Reviewed-by: Chris Park Acked-by: Alex Hung Signed-off-by: Leo (Hanghong) Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index b691aa45e84f..79bc207415bc 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF From 02fc996d5098f4c3f65bdf6cdb6b28e3f29ba789 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 21 Mar 2022 16:25:24 +0800 Subject: [PATCH 024/423] drm/amdgpu/vcn: Fix the register setting for vcn1 Correct the code error for setting register UVD_GFX10_ADDR_CONFIG. Need to use inst_idx, or it only will set VCN0. Signed-off-by: Emily Deng Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c87263ed20ec..b16c56aa2d22 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -575,8 +575,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( - UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) From e5fc78252ccd8dfc260f87d83905e9dffff6d975 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Mon, 14 Mar 2022 12:07:14 -0400 Subject: [PATCH 025/423] drm/amd/display: Add support for zstate during extended vblank [why] When we enter FREESYNC_STATE_VIDEO, we want to use the extra vblank portion to enter zstate if possible. [how] When we enter freesync, a full update is triggered and the new vtotal with extra lines is passed to dml in a stream update. The time gained from extra vblank lines is calculated in microseconds. We allow zstate entry if the time gained is greater than 5 ms, which is the current policy. Furthermore, an optimized value for min_dst_y_next_start is calculated and written to its register. When exiting freesync, another full update is triggered and default values are restored. Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 6 +++++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 ++ .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 12 ++++++++++++ .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c | 8 ++++++++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 18 +++++++++++++++--- .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 13 +++++++++++++ .../amd/display/dc/dml/display_mode_structs.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 3 +++ 10 files changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 75f9c97bebb0..f2ad8f58e69c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2393,6 +2393,8 @@ static enum surface_update_type check_update_surfaces_for_stream( if (stream_update->mst_bw_update) su_flags->bits.mst_bw = 1; + if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc)) + su_flags->bits.crtc_timing_adjust = 1; if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2654,6 +2656,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4055,3 +4060,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); } +/* + * dc_extended_blank_supported: Decide whether extended blank is supported + * + * Extended blank is a freesync optimization feature to be enabled in the future. + * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. + * + * @param [in] dc: Current DC state + * @return: Indicate whether extended blank is supported (true or false) + */ +bool dc_extended_blank_supported(struct dc *dc) +{ + return dc->debug.extended_blank_optimization && !dc->debug.disable_z10 + && dc->caps.zstate_support && dc->caps.is_apu; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4ffab7bb1098..9f4d926d54e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -188,6 +188,7 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; + bool zstate_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; @@ -703,13 +704,14 @@ struct dc_debug_options { bool enable_driver_sequence_debug; enum det_size crb_alloc_policy; int crb_alloc_policy_min_disp_count; -#if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; +#if defined(CONFIG_DRM_AMD_DC_DCN) bool enable_z9_disable_interface; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif bool apply_vendor_specific_lttpr_wa; + bool extended_blank_optimization; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; @@ -1369,6 +1371,8 @@ struct dc_sink_init_data { bool converter_disable_audio; }; +bool dc_extended_blank_supported(struct dc *dc); + struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params); /* Newer interfaces */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 99a750f561f8..c4168c11257c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -131,6 +131,7 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; + uint32_t crtc_timing_adjust : 1; } bits; uint32_t raw; @@ -289,6 +290,7 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; + struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index ab910deed481..4290eaf11a04 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1857,6 +1857,7 @@ void dcn20_optimize_bandwidth( struct dc_state *context) { struct hubbub *hubbub = dc->res_pool->hubbub; + int i; /* program dchubbub watermarks */ hubbub->funcs->program_watermarks(hubbub, @@ -1873,6 +1874,17 @@ void dcn20_optimize_bandwidth( dc->clk_mgr, context, true); + if (dc_extended_blank_supported(dc) && context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank + && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max + && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total) + pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, + pipe_ctx->dlg_regs.optimized_min_dst_y_next_start); + } + } /* increase compbuf size */ if (hubbub->funcs->program_compbuf_size) hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 53b792b997b7..8ae6117953ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -54,6 +54,13 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset); } +void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET(BLANK_OFFSET_1, 0, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized); +} + static struct hubp_funcs dcn31_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -80,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .set_unbounded_requesting = hubp31_set_unbounded_requesting, .hubp_soft_reset = hubp31_soft_reset, .hubp_in_blank = hubp1_in_blank, + .program_extended_blank = hubp31_program_extended_blank, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 89b7b6b7254a..338235bcef4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2232,6 +2232,7 @@ static bool dcn31_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.is_apu = true; + dc->caps.zstate_support = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 2f6122153bdb..f93af45aeab4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -722,8 +722,10 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; + unsigned int optimized_min_dst_y_next_start_us; plane_count = 0; + optimized_min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -744,11 +746,22 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; + if (dc_extended_blank_supported(dc)) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[0] + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) { + optimized_min_dst_y_next_start_us = + context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us; + break; + } + } + } /* zstate only supported on PWRSEQ0 and when there's <2 planes*/ if (link->link_index != 0 || stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) return DCN_ZSTATE_SUPPORT_ALLOW; else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr) return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; @@ -786,8 +799,6 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) @@ -843,6 +854,7 @@ void dcn20_calculate_dlg_params( &pipes[pipe_idx].pipe); pipe_idx++; } + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); } static void swizzle_to_dml_params( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index e0fecf127bd5..53d760e169e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -1055,6 +1055,7 @@ static void dml_rq_dlg_get_dlg_params( float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + int blank_lines; memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); @@ -1080,6 +1081,18 @@ static void dml_rq_dlg_get_dlg_params( dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); + blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1); + if (blank_lines < 0) + blank_lines = 0; + if (blank_lines != 0) { + disp_dlg_regs->optimized_min_dst_y_next_start_us = + ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; + disp_dlg_regs->optimized_min_dst_y_next_start = + (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2)); + } else { + // use unoptimized value + disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + } ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 59f0a61c33cf..2df660cd8801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -446,6 +446,8 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int refcyc_h_blank_end; unsigned int dlg_vblank_end; unsigned int min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start_us; unsigned int refcyc_per_htotal; unsigned int refcyc_x_after_scaler; unsigned int dst_y_after_scaler; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index e45b7993c5c5..ad69d78c4ac3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -195,6 +195,9 @@ struct hubp_funcs { void (*hubp_set_flip_int)(struct hubp *hubp); + void (*program_extended_blank)(struct hubp *hubp, + unsigned int min_dst_y_next_start_optimized); + void (*hubp_wait_pipe_read_start)(struct hubp *hubp); }; From a572f7055067d95455850fd242d8b54ff5786cac Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 22 Feb 2022 15:53:32 -0500 Subject: [PATCH 026/423] drm/amd/display: remove destructive verify link for TMDS [why and how] TMDS does not need destructive verify link Reviewed-by: Aric Cyr Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cb87dd643180..bbaa5abdf888 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -983,8 +983,7 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, destrictive = false; } } - } else if (dc_is_hdmi_signal(link->local_sink->sink_signal)) - destrictive = true; + } return destrictive; } From b7dfbd2e601f3fee545bc158feceba4f340fe7cf Mon Sep 17 00:00:00 2001 From: Tushar Patel Date: Thu, 17 Mar 2022 15:31:22 -0400 Subject: [PATCH 027/423] drm/amdkfd: Fix Incorrect VMIDs passed to HWS Compute-only GPUs have more than 8 VMIDs allocated to KFD. Fix this by passing correct number of VMIDs to HWS v2: squash in warning fix (Alex) Signed-off-by: Tushar Patel Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bb1c025d9001..b03663f42cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -680,7 +680,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 339e12c94cff..0887e26ce23b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -483,15 +483,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * From c5650327aba02d15cbd6a1846dcde9231493d549 Mon Sep 17 00:00:00 2001 From: Divya Shikre Date: Tue, 22 Mar 2022 15:00:12 -0400 Subject: [PATCH 028/423] drm/amdkfd: Check use_xgmi_p2p before reporting hive_id Recently introduced commit 158a05a0b885 ("drm/amdgpu: Add use_xgmi_p2p module parameter") did not update XGMI iolinks when use_xgmi_p2p is disabled. Add fix to not create XGMI iolinks in KFD topology when this parameter is disabled. Fixes: 158a05a0b885 ("drm/amdgpu: Add use_xgmi_p2p module parameter") Signed-off-by: Divya Shikre Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0887e26ce23b..62aa6c9d5123 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -531,7 +531,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; + if (amdgpu_use_xgmi_p2p) + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; kfd->noretry = kfd->adev->gmc.noretry; From 6ea239adc2a712eb318f04f5c29b018ba65ea38a Mon Sep 17 00:00:00 2001 From: Tianci Yin Date: Wed, 23 Mar 2022 23:54:58 +0800 Subject: [PATCH 029/423] drm/amdgpu/vcn: improve vcn dpg stop procedure Prior to disabling dpg, VCN need unpausing dpg mode, or VCN will hang in S3 resuming. Reviewed-by: James Zhu Signed-off-by: Tianci Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index b16c56aa2d22..0d590183328f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1480,8 +1480,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, 0, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); From ebbb7bb9e80305820dc2328a371c1b35679f2667 Mon Sep 17 00:00:00 2001 From: QintaoShen Date: Thu, 24 Mar 2022 16:26:23 +0800 Subject: [PATCH 030/423] drm/amdkfd: Check for potential null return of kmalloc_array() As the kmalloc_array() may return null, the 'event_waiters[i].wait' would lead to null-pointer dereference. Therefore, it is better to check the return value of kmalloc_array() to avoid this confusion. Signed-off-by: QintaoShen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index deecccebe5b6..64f4a51cc880 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -749,6 +749,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); From 5f3854f1f4e211f494018160b348a1c16e58013f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 24 Mar 2022 18:04:00 -0400 Subject: [PATCH 031/423] drm/amdgpu: add more cases to noretry=1 Port current list from amd-staging-drm-next. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ca2cfb65f976..f7216afe8a6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -564,6 +564,9 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 3): + case IP_VERSION(9, 3, 0): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 7057c81773ac32fd0dba00e2bb869928f008d3e2 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 1 Mar 2022 22:31:40 +0800 Subject: [PATCH 032/423] drm/amdgpu: set noretry=1 for gc 10.3.6 this patch to set noretry=1 for gc 10.3.6. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index f7216afe8a6a..7021e8f390bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -567,6 +567,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): case IP_VERSION(9, 3, 0): + case IP_VERSION(10, 3, 6): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 609910db56e72e87755d9745442bfdfa009dc61b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 2 Mar 2022 15:18:18 -0500 Subject: [PATCH 033/423] drm/amdgpu: set noretry=1 for GFX 10.3.4 Retry faults are not supported on GFX 10.3.4. Signed-off-by: Felix Kuehling Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 7021e8f390bd..e1635a3f2553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -561,12 +561,13 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): - case IP_VERSION(9, 3, 0): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): /* * noretry = 0 will cause kfd page fault tests fail From 0dc386add50b07e1cf9341b4e6e4fea77295c98a Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 22 Mar 2022 16:25:29 +0800 Subject: [PATCH 034/423] drm/amdgpu: set noretry for gfx 10.3.7 Disable xnack on the gfx10.3.7 for the KFD test. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index e1635a3f2553..a66a0881a934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -569,6 +569,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 15f9cd4334c83716fa32647652a609e3ba6c998d Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 24 Mar 2022 16:14:16 +0800 Subject: [PATCH 035/423] drm/amdgpu/gfx10: enable gfx1037 clock counter retrieval function Enable gfx1037 clock counter retrieval function for KFDPerfCountersTest.ClockCountersBasicTest. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f4c6accd3226..a98b78e0b507 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7689,6 +7689,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); From 5f7b839d47dbc74cf4a07beeab5191f93678673e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Mon, 28 Mar 2022 10:48:59 +0800 Subject: [PATCH 036/423] SUNRPC: Return true/false (not 1/0) from bool functions Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. This fixes the following warnings from coccicheck: ./fs/nfsd/nfs2acl.c:289:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_accessres' with return type bool ./fs/nfsd/nfs2acl.c:252:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_getaclres' with return type bool Signed-off-by: Haowen Bai Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 367551bddfc6..b5760801d377 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -249,34 +249,34 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) int w; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; if (dentry == NULL || d_really_is_negative(dentry)) - return 1; + return true; inode = d_inode(dentry); if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; + return false; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 1; + return true; w -= PAGE_SIZE; } if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, resp->mask & NFS_ACL, 0)) - return 0; + return false; if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) - return 0; + return false; - return 1; + return true; } /* ACCESS */ @@ -286,17 +286,17 @@ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; } - return 1; + return true; } /* From af41d2866f7d75bbb38d487f6ec7770425d70e45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Mar 2022 09:32:26 +0200 Subject: [PATCH 037/423] powerpc/64: Fix build failure with allyesconfig in book3s_64_entry.S Using conditional branches between two files is hasardous, they may get linked too far from each other. arch/powerpc/kvm/book3s_64_entry.o:(.text+0x3ec): relocation truncated to fit: R_PPC64_REL14 (stub) against symbol `system_reset_common' defined in .text section in arch/powerpc/kernel/head_64.o Reorganise the code to use non conditional branches. Fixes: 89d35b239101 ("KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C") Signed-off-by: Christophe Leroy [mpe: Avoid odd-looking bne ., use named local labels] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/89cf27bf43ee07a0b2879b9e8e2f5cd6386a3645.1648366338.git.christophe.leroy@csgroup.eu --- arch/powerpc/kvm/book3s_64_entry.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 05e003eb5d90..e42d1c609e47 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) */ ld r10,HSTATE_SCRATCH0(r13) cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_common + beq .Lcall_machine_check_common cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET - beq system_reset_common + beq .Lcall_system_reset_common b . + +.Lcall_machine_check_common: + b machine_check_common + +.Lcall_system_reset_common: + b system_reset_common #endif From 21d139d73f776aed1e86f3175a1e9fb8a10930c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 29 Mar 2022 08:45:04 +0200 Subject: [PATCH 038/423] dma-buf/sync-file: fix logic error in new fence merge code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the array is empty because everything is signaled we can't use add_fence() to add something because that would filter the signaled fence again. Signed-off-by: Christian König Fixes: 519f490db07e ("dma-buf/sync-file: fix warning about fence containers") Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220329070001.134180-1-christian.koenig@amd.com --- drivers/dma-buf/sync_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index b8dea4ec123b..514d213261df 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -262,7 +262,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, } if (index == 0) - add_fence(fences, &index, dma_fence_get_stub()); + fences[index++] = dma_fence_get_stub(); if (num_fences > index) { struct dma_fence **tmp; From c42ee39c1e78224d3a81bdbe0600abe4581226ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 25 Mar 2022 16:38:54 +0100 Subject: [PATCH 039/423] dma-buf: handle empty dma_fence_arrays gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bug inside the new sync-file merge code created empty dma_fence_array instances. Warn about that and handle those without crashing. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220329070001.134180-2-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 52b85d292383..5c8a7084577b 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -159,6 +159,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence_array *array; size_t size = sizeof(*array); + WARN_ON(!num_fences || !fences); + /* Allocate the callback structures behind the array. */ size += num_fences * sizeof(struct dma_fence_array_cb); array = kzalloc(size, GFP_KERNEL); @@ -231,6 +233,9 @@ struct dma_fence *dma_fence_array_first(struct dma_fence *head) if (!array) return head; + if (!array->num_fences) + return NULL; + return array->fences[0]; } EXPORT_SYMBOL(dma_fence_array_first); From ab0fc21bc7105b54bafd85bd8b82742f9e68898a Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:07 +0800 Subject: [PATCH 040/423] Revert "NFSv4: Handle the special Linux file open access mode" This reverts commit 44942b4e457beda00981f616402a1a791e8c616e. After secondly opening a file with O_ACCMODE|O_DIRECT flags, nfs4_valid_open_stateid() will dereference NULL nfs4_state when lseek(). Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) 5. lseek(fd) Reported-by: Lyu Tao Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/nfs4file.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e51d86707fca..e72900c059ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } -EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d258933cf8c8..f336d0a4190e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -51,7 +51,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) return err; if ((openflags & O_ACCMODE) == 3) - return nfs_open(inode, filp); + openflags--; /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); From b243874f6f9568b2daf1a00e9222cacdc15e159c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:08 +0800 Subject: [PATCH 041/423] NFSv4: fix open failure with O_ACCMODE flag open() with O_ACCMODE|O_DIRECT flags secondly will fail. Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) Server nfsd4_decode_share_access() will fail with error nfserr_bad_xdr when client use incorrect share access mode of 0. Fix this by using NFS4_SHARE_ACCESS_BOTH share access mode in client, just like firstly opening. Fixes: ce4ef7c0a8a05 ("NFS: Split out NFS v4 file operations") Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 ---------- fs/nfs/internal.h | 10 ++++++++++ fs/nfs/nfs4file.c | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bac4cf1a308e..0365063b85a2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1991,16 +1991,6 @@ const struct dentry_operations nfs4_dentry_operations = { }; EXPORT_SYMBOL_GPL(nfs4_dentry_operations); -static fmode_t flags_to_mode(int flags) -{ - fmode_t res = (__force fmode_t)flags & FMODE_EXEC; - if ((flags & O_ACCMODE) != O_WRONLY) - res |= FMODE_READ; - if ((flags & O_ACCMODE) != O_RDONLY) - res |= FMODE_WRITE; - return res; -} - static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 57b0497105c8..7eefa16ed381 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) return true; } +static inline fmode_t flags_to_mode(int flags) +{ + fmode_t res = (__force fmode_t)flags & FMODE_EXEC; + if ((flags & O_ACCMODE) != O_WRONLY) + res |= FMODE_READ; + if ((flags & O_ACCMODE) != O_RDONLY) + res |= FMODE_WRITE; + return res; +} + /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f336d0a4190e..7b861e4f0533 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; + fmode_t f_mode; struct iattr attr; int err; @@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (err) return err; + f_mode = filp->f_mode; if ((openflags & O_ACCMODE) == 3) - openflags--; + f_mode |= flags_to_mode(openflags); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); @@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; From eb07d5a4da041fd2e30e386e5fd12d23bb31cf9e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 30 Mar 2022 11:48:37 +1100 Subject: [PATCH 042/423] SUNRPC: handle malloc failure in ->request_prepare If ->request_prepare() detects an error, it sets ->rq_task->tk_status. This is easy for callers to ignore. The only caller is xprt_request_enqueue_receive() and it does ignore the error, as does call_encode() which calls it. This can result in a request being queued to receive a reply without an allocated receive buffer. So instead of setting rq_task->tk_status, return an error, and store in ->tk_status only in call_encode(); The call to xprt_request_enqueue_receive() is now earlier in call_encode(), where the error can still be handled. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++--- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/xprt.c | 23 +++++++++++++++-------- net/sunrpc/xprtsock.c | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eef5e87c03b4..f171f8c09e13 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -144,7 +144,7 @@ struct rpc_xprt_ops { unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - void (*prepare_request)(struct rpc_rqst *req); + int (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -357,10 +357,9 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); -void xprt_request_enqueue_receive(struct rpc_task *task); +int xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_request_dequeue_xprt(struct rpc_task *task); bool xprt_request_need_retransmit(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8bf2af8546d2..3c7407104d54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1858,6 +1858,9 @@ call_encode(struct rpc_task *task) xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); + /* Add task to reply queue before transmission to avoid races */ + if (task->tk_status == 0 && rpc_reply_expected(task)) + task->tk_status = xprt_request_enqueue_receive(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1881,9 +1884,6 @@ call_encode(struct rpc_task *task) return; } - /* Add task to reply queue before transmission to avoid races */ - if (rpc_reply_expected(task)) - xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: task->tk_action = call_transmit; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 880bfe8dc7f6..73344ffb2692 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -69,10 +69,11 @@ /* * Local functions */ -static void xprt_init(struct rpc_xprt *xprt, struct net *net); +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); -static void xprt_destroy(struct rpc_xprt *xprt); -static void xprt_request_init(struct rpc_task *task); +static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); +static int xprt_request_prepare(struct rpc_rqst *req); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -1143,16 +1144,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) * @task: RPC task * */ -void +int xprt_request_enqueue_receive(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + int ret; if (!xprt_request_need_enqueue_receive(task, req)) - return; + return 0; - xprt_request_prepare(task->tk_rqstp); + ret = xprt_request_prepare(task->tk_rqstp); + if (ret) + return ret; spin_lock(&xprt->queue_lock); /* Update the softirq receive buffer */ @@ -1166,6 +1170,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); + return 0; } /** @@ -1452,14 +1457,16 @@ xprt_request_dequeue_xprt(struct rpc_task *task) * * Calls into the transport layer to do whatever is needed to prepare * the request for transmission or receive. + * Returns error, or zero. */ -void +static int xprt_request_prepare(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; if (xprt->ops->prepare_request) - xprt->ops->prepare_request(req); + return xprt->ops->prepare_request(req); + return 0; } /** diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 78af7518f263..9b75891b3cc0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -822,11 +822,11 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) return ret; } -static void +static int xs_stream_prepare_request(struct rpc_rqst *req) { xdr_free_bvec(&req->rq_rcv_buf); - req->rq_task->tk_status = xdr_alloc_bvec( + return xdr_alloc_bvec( &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); } From 87d663d40801dffc99a5ad3b0188ad3e2b4d1557 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 22 Mar 2022 14:57:02 +0900 Subject: [PATCH 043/423] scsi: mpt3sas: Fix use after free in _scsih_expander_node_remove() The function mpt3sas_transport_port_remove() called in _scsih_expander_node_remove() frees the port field of the sas_expander structure, leading to the following use-after-free splat from KASAN when the ioc_info() call following that function is executed (e.g. when doing rmmod of the driver module): [ 3479.371167] ================================================================== [ 3479.378496] BUG: KASAN: use-after-free in _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.386936] Read of size 1 at addr ffff8881c037691c by task rmmod/1531 [ 3479.393524] [ 3479.395035] CPU: 18 PID: 1531 Comm: rmmod Not tainted 5.17.0-rc8+ #1436 [ 3479.401712] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.1 06/02/2021 [ 3479.409263] Call Trace: [ 3479.411743] [ 3479.413875] dump_stack_lvl+0x45/0x59 [ 3479.417582] print_address_description.constprop.0+0x1f/0x120 [ 3479.423389] ? _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.429469] kasan_report.cold+0x83/0xdf [ 3479.433438] ? _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.439514] _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.445411] ? _raw_spin_unlock_irqrestore+0x2d/0x40 [ 3479.452032] scsih_remove+0x525/0xc90 [mpt3sas] [ 3479.458212] ? mpt3sas_expander_remove+0x1d0/0x1d0 [mpt3sas] [ 3479.465529] ? down_write+0xde/0x150 [ 3479.470746] ? up_write+0x14d/0x460 [ 3479.475840] ? kernfs_find_ns+0x137/0x310 [ 3479.481438] pci_device_remove+0x65/0x110 [ 3479.487013] __device_release_driver+0x316/0x680 [ 3479.493180] driver_detach+0x1ec/0x2d0 [ 3479.498499] bus_remove_driver+0xe7/0x2d0 [ 3479.504081] pci_unregister_driver+0x26/0x250 [ 3479.510033] _mpt3sas_exit+0x2b/0x6cf [mpt3sas] [ 3479.516144] __x64_sys_delete_module+0x2fd/0x510 [ 3479.522315] ? free_module+0xaa0/0xaa0 [ 3479.527593] ? __cond_resched+0x1c/0x90 [ 3479.532951] ? lockdep_hardirqs_on_prepare+0x273/0x3e0 [ 3479.539607] ? syscall_enter_from_user_mode+0x21/0x70 [ 3479.546161] ? trace_hardirqs_on+0x1c/0x110 [ 3479.551828] do_syscall_64+0x35/0x80 [ 3479.556884] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3479.563402] RIP: 0033:0x7f1fc482483b ... [ 3479.943087] ================================================================== Fix this by introducing the local variable port_id to store the port ID value before executing mpt3sas_transport_port_remove(). This local variable is then used in the call to ioc_info() instead of dereferencing the freed port structure. Link: https://lore.kernel.org/r/20220322055702.95276-1-damien.lemoal@opensource.wdc.com Fixes: 7d310f241001 ("scsi: mpt3sas: Get device objects using sas_address & portID") Cc: stable@vger.kernel.org Acked-by: Sreekanth Reddy Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 00792767c620..7e476f50935b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11035,6 +11035,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, { struct _sas_port *mpt3sas_port, *next; unsigned long flags; + int port_id; /* remove sibling ports attached to this expander */ list_for_each_entry_safe(mpt3sas_port, next, @@ -11055,6 +11056,8 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, mpt3sas_port->hba_port); } + port_id = sas_expander->port->port_id; + mpt3sas_transport_port_remove(ioc, sas_expander->sas_address, sas_expander->sas_address_parent, sas_expander->port); @@ -11062,7 +11065,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, "expander_remove: handle(0x%04x), sas_addr(0x%016llx), port:%d\n", sas_expander->handle, (unsigned long long) sas_expander->sas_address, - sas_expander->port->port_id); + port_id); spin_lock_irqsave(&ioc->sas_node_lock, flags); list_del(&sas_expander->list); From ebfe3e0c5e805da3dd692bb120cd6269b7c19b80 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 17 Mar 2022 11:01:16 -0400 Subject: [PATCH 044/423] scsi: libiscsi: Remove unnecessary memset() in iscsi_conn_setup() iscsi_cls_conn is alloced by kzalloc(), the whole iscsi_cls_conn is zero filled already including the dd_data. So it is unnecessary to call memset again. Link: https://lore.kernel.org/r/20220317150116.194140-1-haowenchao@huawei.com Reviewed-by: Wu Bo Reviewed-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: Wenchao Hao Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d09926e6c8a8..cf4211c6500d 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3045,7 +3045,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, if (!cls_conn) return NULL; conn = cls_conn->dd_data; - memset(conn, 0, sizeof(*conn) + dd_size); conn->dd_data = cls_conn->dd_data + sizeof(*conn); conn->session = session; From a6968f7a367f128d120447360734344d5a3d5336 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Fri, 11 Mar 2022 21:22:05 +0800 Subject: [PATCH 045/423] scsi: target: tcmu: Fix possible page UAF tcmu_try_get_data_page() looks up pages under cmdr_lock, but it does not take refcount properly and just returns page pointer. When tcmu_try_get_data_page() returns, the returned page may have been freed by tcmu_blocks_release(). We need to get_page() under cmdr_lock to avoid concurrent tcmu_blocks_release(). Link: https://lore.kernel.org/r/20220311132206.24515-1-xiaoguang.wang@linux.alibaba.com Reviewed-by: Bodo Stroesser Signed-off-by: Xiaoguang Wang Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 95d4ca50a605..fd7267baa707 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1821,6 +1821,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi) mutex_lock(&udev->cmdr_lock); page = xa_load(&udev->data_pages, dpi); if (likely(page)) { + get_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1877,6 +1878,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) /* For the vmalloc()ed cmd area pages */ addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + get_page(page); } else { uint32_t dpi; @@ -1887,7 +1889,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - get_page(page); vmf->page = page; return 0; } From 35ed9613d83f3c1f011877d591fd7d36f2666106 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:34 -0700 Subject: [PATCH 046/423] scsi: lpfc: Improve PCI EEH Error and Recovery Handling Following EEH errors, the driver can crash or hang when deleting the localport or when attempting to unload. The EEH handlers in the driver did not notify the NVMe-FC transport before tearing the driver down. This was delayed until the resume steps. This worked for SCSI because lpfc_block_scsi() would notify the scsi_fc_transport that the target was not available but it would not clean up all the references to the ndlp. The SLI3 prep for dev reset handler did the lpfc_offline_prep() and lpfc_offline() calls to get the port stopped before restarting. The SLI4 version of the prep for dev reset just destroyed the queues and did not stop NVMe from continuing. Also because the port was not really stopped the localport destroy would hang because the transport was still waiting for I/O. Additionally, a devloss tmo can fire and post events to a stopped worker thread creating another hang condition. lpfc_sli4_prep_dev_for_reset() is modified to call lpfc_offline_prep() and lpfc_offline() rather than just lpfc_scsi_dev_block() to ensure both SCSI and NVMe transports are notified to block I/O to the driver. Logic is added to devloss handler and worker thread to clean up ndlp references and quiesce appropriately. Link: https://lore.kernel.org/r/20220317032737.45308-2-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 7 +- drivers/scsi/lpfc/lpfc_crtn.h | 3 + drivers/scsi/lpfc/lpfc_hbadisc.c | 119 +++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_init.c | 60 ++++++++++------ drivers/scsi/lpfc/lpfc_nvme.c | 11 ++- drivers/scsi/lpfc/lpfc_sli.c | 15 ++-- 6 files changed, 157 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 86653aa9b389..8405fd0bbc59 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -896,6 +896,11 @@ enum lpfc_irq_chann_mode { NHT_MODE, }; +enum lpfc_hba_bit_flags { + FABRIC_COMANDS_BLOCKED, + HBA_PCI_ERR, +}; + struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_io_buf * (*lpfc_get_scsi_buf) @@ -1042,7 +1047,6 @@ struct lpfc_hba { * Firmware supports Forced Link Speed * capability */ -#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */ #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_SHORT_CMF 0x200000 /* shorter CMF timer routine */ #define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */ @@ -1349,7 +1353,6 @@ struct lpfc_hba { atomic_t fabric_iocb_count; struct timer_list fabric_block_timer; unsigned long bit_flags; -#define FABRIC_COMANDS_BLOCKED 0 atomic_t num_rsrc_err; atomic_t num_cmd_success; unsigned long last_rsrc_error_time; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 96408cd6c4c8..9897a1aa387b 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -670,3 +670,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport, uint32_t hash, uint8_t *buf); void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport); int lpfc_issue_els_qfpa(struct lpfc_vport *vport); + +void lpfc_sli_rpi_release(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 0144da30e3db..6983c70f2fc6 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport) ndlp = rdata->pnode; if (!rdata->pnode) { - pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n", - __func__, rport, rport->scsi_target_id); + pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n", + __func__, rport, rport->scsi_target_id); return -EINVAL; } @@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "3181 dev_loss_callbk x%06x, rport x%px flg x%x " - "load_flag x%x refcnt %d\n", + "load_flag x%x refcnt %d state %d xpt x%x\n", ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag, - vport->load_flag, kref_read(&ndlp->kref)); + vport->load_flag, kref_read(&ndlp->kref), + ndlp->nlp_state, ndlp->fc4_xpt_flags); /* Don't schedule a worker thread event if the vport is going down. * The teardown process cleans up the node via lpfc_drop_node. @@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD; + /* clear the NLP_XPT_REGD if the node is not registered + * with nvme-fc + */ + if (ndlp->fc4_xpt_flags == NLP_XPT_REGD) + ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; /* Remove the node reference from remote_port_add now. * The driver will not call remote_port_delete. @@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; spin_unlock_irqrestore(&ndlp->lock, iflags); - /* We need to hold the node by incrementing the reference - * count until this queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); + if (phba->worker_thread) { + /* We need to hold the node by incrementing the reference + * count until this queued work is done + */ + evtp->evt_arg1 = lpfc_nlp_get(ndlp); + + spin_lock_irqsave(&phba->hbalock, iflags); + if (evtp->evt_arg1) { + evtp->evt = LPFC_EVT_DEV_LOSS; + list_add_tail(&evtp->evt_listp, &phba->work_list); + lpfc_worker_wake_up(phba); + } + spin_unlock_irqrestore(&phba->hbalock, iflags); + } else { + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "3188 worker thread is stopped %s x%06x, " + " rport x%px flg x%x load_flag x%x refcnt " + "%d\n", __func__, ndlp->nlp_DID, + ndlp->rport, ndlp->nlp_flag, + vport->load_flag, kref_read(&ndlp->kref)); + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) { + spin_lock_irqsave(&ndlp->lock, iflags); + /* Node is in dev loss. No further transaction. */ + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); + } - spin_lock_irqsave(&phba->hbalock, iflags); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_DEV_LOSS; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); } - spin_unlock_irqrestore(&phba->hbalock, iflags); return; } @@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, "0203 Devloss timeout on " "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x " - "NPort x%06x Data: x%x x%x x%x\n", + "NPort x%06x Data: x%x x%x x%x refcnt %d\n", *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_state, ndlp->nlp_rpi); + ndlp->nlp_state, ndlp->nlp_rpi, + kref_read(&ndlp->kref)); } else { lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT, "0204 Devloss timeout on " @@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba) int free_evt; int fcf_inuse; uint32_t nlp_did; + bool hba_pci_err; spin_lock_irq(&phba->hbalock); while (!list_empty(&phba->work_list)) { list_remove_head((&phba->work_list), evtp, typeof(*evtp), evt_listp); spin_unlock_irq(&phba->hbalock); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); free_evt = 1; switch (evtp->evt) { case LPFC_EVT_ELS_RETRY: ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); - lpfc_els_retry_delay_handler(ndlp); - free_evt = 0; /* evt is part of ndlp */ + if (!hba_pci_err) { + lpfc_els_retry_delay_handler(ndlp); + free_evt = 0; /* evt is part of ndlp */ + } /* decrement the node reference count held * for this queued work */ @@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba) break; case LPFC_EVT_RECOVER_PORT: ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); - lpfc_sli_abts_recover_port(ndlp->vport, ndlp); - free_evt = 0; + if (!hba_pci_err) { + lpfc_sli_abts_recover_port(ndlp->vport, ndlp); + free_evt = 0; + } /* decrement the node reference count held for * this queued work */ @@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba) struct lpfc_vport **vports; struct lpfc_vport *vport; int i; + bool hba_pci_err; + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); spin_lock_irq(&phba->hbalock); ha_copy = phba->work_ha; phba->work_ha = 0; spin_unlock_irq(&phba->hbalock); + if (hba_pci_err) + ha_copy = 0; /* First, try to post the next mailbox command to SLI4 device */ - if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) + if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err) lpfc_sli4_post_async_mbox(phba); if (ha_copy & HA_ERATT) { @@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba) lpfc_handle_latt(phba); /* Handle VMID Events */ - if (lpfc_is_vmid_enabled(phba)) { + if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) { if (phba->pport->work_port_events & WORKER_CHECK_VMID_ISSUE_QFPA) { lpfc_check_vmid_qfpa_issue(phba); @@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba) work_port_events = vport->work_port_events; vport->work_port_events &= ~work_port_events; spin_unlock_irq(&vport->work_port_lock); + if (hba_pci_err) + continue; if (work_port_events & WORKER_DISC_TMO) lpfc_disc_timeout_handler(vport); if (work_port_events & WORKER_ELS_TMO) @@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba) struct lpfc_vport **vports; LPFC_MBOXQ_t *mb; int i; + int offline; if (phba->link_state == LPFC_LINK_DOWN) return 0; /* Block all SCSI stack I/Os */ lpfc_scsi_dev_block(phba); + offline = pci_channel_offline(phba->pcidev); phba->defer_flogi_acc_flag = false; @@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba) lpfc_destroy_vport_work_array(phba, vports); /* Clean up any SLI3 firmware default rpi's */ - if (phba->sli_rev > LPFC_SLI_REV3) + if (phba->sli_rev > LPFC_SLI_REV3 || offline) goto skip_unreg_did; mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) spin_lock_irqsave(&ndlp->lock, iflags); if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) { spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "0999 %s Not regd: ndlp x%px rport x%px DID " + "x%x FLG x%x XPT x%x\n", + __func__, ndlp, ndlp->rport, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->fc4_xpt_flags); return; } @@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->fc4_xpt_flags & SCSI_XPT_REGD) { vport->phba->nport_event_cnt++; lpfc_unregister_remote_port(ndlp); + } else if (!ndlp->rport) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "1999 %s NDLP in devloss x%px DID x%x FLG x%x" + " XPT x%x refcnt %d\n", + __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->fc4_xpt_flags, + kref_read(&ndlp->kref)); } if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) { @@ -6097,12 +6148,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport) } } +/* + * lpfc_notify_xport_npr - notifies xport of node disappearance + * @vport: Pointer to Virtual Port object. + * + * Transitions all ndlps to NPR state. When lpfc_nlp_set_state + * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered + * and transport notified that the node is gone. + * Return Code: + * none + */ +static void +lpfc_notify_xport_npr(struct lpfc_vport *vport) +{ + struct lpfc_nodelist *ndlp, *next_ndlp; + + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } +} void lpfc_cleanup_discovery_resources(struct lpfc_vport *vport) { lpfc_els_flush_rscn(vport); lpfc_els_flush_cmd(vport); lpfc_disc_flush_list(vport); + if (pci_channel_offline(vport->phba->pcidev)) + lpfc_notify_xport_npr(vport); } /*****************************************************************************/ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index eed6464bd880..b8ab6dcbadc5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1642,7 +1642,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba) { spin_lock_irq(&phba->hbalock); if (phba->link_state == LPFC_HBA_ERROR && - phba->hba_flag & HBA_PCI_ERR) { + test_bit(HBA_PCI_ERR, &phba->bit_flags)) { spin_unlock_irq(&phba->hbalock); return; } @@ -3682,7 +3682,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) struct lpfc_vport **vports; struct Scsi_Host *shost; int i; - int offline = 0; + int offline; + bool hba_pci_err; if (vport->fc_flag & FC_OFFLINE_MODE) return; @@ -3692,6 +3693,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) lpfc_linkdown(phba); offline = pci_channel_offline(phba->pcidev); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); /* Issue an unreg_login to all nodes on all vports */ vports = lpfc_create_vport_work_array(phba); @@ -3715,11 +3717,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) ndlp->nlp_flag &= ~NLP_NPR_ADISC; spin_unlock_irq(&ndlp->lock); - if (offline) { + if (offline || hba_pci_err) { spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~(NLP_UNREG_INP | NLP_RPI_REGISTERED); spin_unlock_irq(&ndlp->lock); + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli_rpi_release(vports[i], + ndlp); } else { lpfc_unreg_rpi(vports[i], ndlp); } @@ -13374,15 +13379,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Disable FW logging to host memory */ lpfc_ras_stop_fwlog(phba); - /* Unset the queues shared with the hardware then release all - * allocated resources. - */ - lpfc_sli4_queue_unset(phba); - lpfc_sli4_queue_destroy(phba); - /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); + /* release all queue allocated resources. */ + lpfc_sli4_queue_destroy(phba); + /* Free RAS DMA memory */ if (phba->ras_fwlog.ras_enabled) lpfc_sli4_ras_dma_free(phba); @@ -15057,24 +15059,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba) static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "2826 PCI channel disable preparing for reset\n"); + int offline = pci_channel_offline(phba->pcidev); + + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2826 PCI channel disable preparing for reset offline" + " %d\n", offline); /* Block any management I/Os to the device */ lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT); - /* Block all SCSI devices' I/Os on the host */ - lpfc_scsi_dev_block(phba); + /* HBA_PCI_ERR was set in io_error_detect */ + lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); /* Flush all driver's outstanding I/Os as we are to reset */ lpfc_sli_flush_io_rings(phba); + lpfc_offline(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); + lpfc_sli4_queue_destroy(phba); /* Disable interrupt and pci device */ lpfc_sli4_disable_intr(phba); - lpfc_sli4_queue_destroy(phba); pci_disable_device(phba->pcidev); } @@ -15123,6 +15129,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) { struct Scsi_Host *shost = pci_get_drvdata(pdev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; + bool hba_pci_err; switch (state) { case pci_channel_io_normal: @@ -15130,17 +15137,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) lpfc_sli4_prep_dev_for_recover(phba); return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Fatal error, prepare for slot reset */ - lpfc_sli4_prep_dev_for_reset(phba); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2832 Already handling PCI error " + "state: x%x\n", state); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: - phba->hba_flag |= HBA_PCI_ERR; + set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Permanent failure, prepare for device down */ lpfc_sli4_prep_dev_for_perm_failure(phba); return PCI_ERS_RESULT_DISCONNECT; default: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); /* Unknown state, prepare and request slot reset */ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2825 Unknown PCI error state: x%x\n", state); @@ -15174,17 +15188,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; struct lpfc_sli *psli = &phba->sli; uint32_t intr_mode; + bool hba_pci_err; dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n"); if (pci_enable_device_mem(pdev)) { printk(KERN_ERR "lpfc: Cannot re-enable " - "PCI device after reset.\n"); + "PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } pci_restore_state(pdev); - phba->hba_flag &= ~HBA_PCI_ERR; + hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + dev_info(&pdev->dev, + "hba_pci_err was not set, recovering slot reset.\n"); /* * As the new kernel behavior of pci_restore_state() API call clears * device saved_state flag, need to save the restored state again. @@ -15239,8 +15257,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev) */ if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) { /* Perform device reset */ - lpfc_offline_prep(phba, LPFC_MBX_WAIT); - lpfc_offline(phba); lpfc_sli_brdrestart(phba); /* Bring the device back online */ lpfc_online(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 1213a299f9aa..e47205e0d3e2 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2169,8 +2169,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_nvme = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; - if (!vport || !vport->localport || - !qp || !qp->io_wq) + if (!vport->localport || !qp || !qp->io_wq) return; pring = qp->io_wq->pring; @@ -2180,8 +2179,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_scsi += qp->abts_scsi_io_bufs; abts_nvme += qp->abts_nvme_io_bufs; } - if (!vport || !vport->localport || - vport->phba->hba_flag & HBA_PCI_ERR) + if (!vport->localport || + test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) || + vport->load_flag & FC_UNLOADING) return; lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -2541,8 +2541,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * return values is ignored. The upcall is a courtesy to the * transport. */ - if (vport->load_flag & FC_UNLOADING || - unlikely(vport->phba->hba_flag & HBA_PCI_ERR)) + if (vport->load_flag & FC_UNLOADING) (void)nvme_fc_set_remoteport_devloss(remoteport, 0); ret = nvme_fc_unregister_remoteport(remoteport); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 20d40957a385..26f6a147b5ae 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2828,6 +2828,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; } +void +lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +{ + __lpfc_sli_rpi_release(vport, ndlp); +} + /** * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler * @phba: Pointer to HBA context object. @@ -4624,11 +4630,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) struct lpfc_iocbq *piocb, *next_iocb; spin_lock_irq(&phba->hbalock); - if (phba->hba_flag & HBA_IOQ_FLUSH || - !phba->sli4_hba.hdwq) { - spin_unlock_irq(&phba->hbalock); - return; - } /* Indicate the I/O queues are flushed */ phba->hba_flag |= HBA_IOQ_FLUSH; spin_unlock_irq(&phba->hbalock); @@ -10997,6 +10998,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, unsigned long iflags; int rc; + /* If the PCI channel is in offline state, do not post iocbs. */ + if (unlikely(pci_channel_offline(phba->pcidev))) + return IOCB_ERROR; + if (phba->sli_rev == LPFC_SLI_REV4) { lpfc_sli_prep_wqe(phba, piocb); From a4691038b4071ff0d9ae486d8822a2c0d41d5796 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:35 -0700 Subject: [PATCH 047/423] scsi: lpfc: Fix unload hang after back to back PCI EEH faults When injecting EEH errors the port is getting hung up waiting on the node list to empty, message number 0233. The driver is stuck at this point and also can't unload. The driver makes transport remoteport delete calls which try to abort I/O's, but the EEH daemon has already called the driver to detach and the detachment has set the global FC_UNLOADING flag. There are several code paths that will avoid I/O cleanup if the FC_UNLOADING flag is set, resulting in transports waiting for I/O while the driver is waiting on transports to clean up. Additionally, during study of the list, a locking issue was found in lpfc_sli_abort_iocb_ring that could corrupt the list. A special case was added to the lpfc_cleanup() routine to call lpfc_sli_flush_rings() if the driver is FC_UNLOADING and if the pci-slot is offline (e.g. EEH). The SLI4 part of lpfc_sli_abort_iocb_ring() is changed to use the ring_lock. Also added code to cancel the I/Os if the pci-slot is offline and added checks and returns for the FC_UNLOADING and HBA_IOQ_FLUSH flags to prevent trying to send an I/O that we cannot handle. Link: https://lore.kernel.org/r/20220317032737.45308-3-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 1 + drivers/scsi/lpfc/lpfc_init.c | 26 +++++++++++++++-- drivers/scsi/lpfc/lpfc_nvme.c | 16 ++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 50 ++++++++++++++++++++++---------- 4 files changed, 72 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 6983c70f2fc6..2b877dff5ed4 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -5422,6 +5422,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; mempool_free(mbox, phba->mbox_mem_pool); acc_plogi = 1; + lpfc_nlp_put(ndlp); } } else { lpfc_printf_vlog(vport, KERN_INFO, diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b8ab6dcbadc5..cf83bc0e27c0 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -95,6 +95,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *); static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *); +static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1985,6 +1986,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) if (pci_channel_offline(phba->pcidev)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "3166 pci channel is offline\n"); + lpfc_sli_flush_io_rings(phba); return; } @@ -2973,6 +2975,22 @@ lpfc_cleanup(struct lpfc_vport *vport) NLP_EVT_DEVICE_RM); } + /* This is a special case flush to return all + * IOs before entering this loop. There are + * two points in the code where a flush is + * avoided if the FC_UNLOADING flag is set. + * one is in the multipool destroy, + * (this prevents a crash) and the other is + * in the nvme abort handler, ( also prevents + * a crash). Both of these exceptions are + * cases where the slot is still accessible. + * The flush here is only when the pci slot + * is offline. + */ + if (vport->load_flag & FC_UNLOADING && + pci_channel_offline(phba->pcidev)) + lpfc_sli_flush_io_rings(vport->phba); + /* At this point, ALL ndlp's should be gone * because of the previous NLP_EVT_DEVICE_RM. * Lets wait for this to happen, if needed. @@ -2985,7 +3003,7 @@ lpfc_cleanup(struct lpfc_vport *vport) list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { lpfc_printf_vlog(ndlp->vport, KERN_ERR, - LOG_TRACE_EVENT, + LOG_DISCOVERY, "0282 did:x%x ndlp:x%px " "refcnt:%d xflags x%x nflag x%x\n", ndlp->nlp_DID, (void *)ndlp, @@ -13359,8 +13377,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Abort all iocbs associated with the hba */ lpfc_sli_hba_iocb_abort(phba); - /* Wait for completion of device XRI exchange busy */ - lpfc_sli4_xri_exchange_busy_wait(phba); + if (!pci_channel_offline(phba->pcidev)) + /* Wait for completion of device XRI exchange busy */ + lpfc_sli4_xri_exchange_busy_wait(phba); /* per-phba callback de-registration for hotplug event */ if (phba->pport) @@ -14264,6 +14283,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba) "2711 PCI channel permanent disable for failure\n"); /* Block all SCSI devices' I/Os on the host */ lpfc_scsi_dev_block(phba); + lpfc_sli4_prep_dev_for_reset(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index e47205e0d3e2..8d26f207ebd2 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -93,6 +93,11 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport, lport = (struct lpfc_nvme_lport *)pnvme_lport->private; vport = lport->vport; + + if (!vport || vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; + qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL); if (qhandle == NULL) return -ENOMEM; @@ -267,7 +272,8 @@ lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, return -EINVAL; remoteport = lpfc_rport->remoteport; - if (!vport->localport) + if (!vport->localport || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -EINVAL; lport = vport->localport->private; @@ -559,6 +565,8 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_DID, ntype, nstate); return -ENODEV; } + if (vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; if (!vport->phba->sli4_hba.nvmels_wq) return -ENOMEM; @@ -662,7 +670,8 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, return -EINVAL; vport = lport->vport; - if (vport->load_flag & FC_UNLOADING) + if (vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -ENODEV; atomic_inc(&lport->fc4NvmeLsRequests); @@ -1516,7 +1525,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; - if (unlikely(vport->load_flag & FC_UNLOADING)) { + if ((unlikely(vport->load_flag & FC_UNLOADING)) || + phba->hba_flag & HBA_IOQ_FLUSH) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR, "6124 Fail IO, Driver unload\n"); atomic_inc(&lport->xmt_fcp_err); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 26f6a147b5ae..70c3929a9fb4 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4542,42 +4542,62 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, void lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - LIST_HEAD(completions); + LIST_HEAD(tx_completions); + LIST_HEAD(txcmplq_completions); struct lpfc_iocbq *iocb, *next_iocb; + int offline; if (pring->ringno == LPFC_ELS_RING) { lpfc_fabric_abort_hba(phba); } + offline = pci_channel_offline(phba->pcidev); /* Error everything on txq and txcmplq * First do the txq. */ if (phba->sli_rev >= LPFC_SLI_REV4) { spin_lock_irq(&pring->ring_lock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - spin_unlock_irq(&pring->ring_lock); - spin_lock_irq(&phba->hbalock); - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); - spin_unlock_irq(&phba->hbalock); + if (offline) { + list_splice_init(&pring->txcmplq, + &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } + spin_unlock_irq(&pring->ring_lock); } else { spin_lock_irq(&phba->hbalock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); + if (offline) { + list_splice_init(&pring->txcmplq, &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } spin_unlock_irq(&phba->hbalock); } - /* Make sure HBA is alive */ - lpfc_issue_hb_tmo(phba); + if (offline) { + /* Cancel all the IOCBs from the completions list */ + lpfc_sli_cancel_iocbs(phba, &txcmplq_completions, + IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); + } else { + /* Make sure HBA is alive */ + lpfc_issue_hb_tmo(phba); + } /* Cancel all the IOCBs from the completions list */ - lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, + lpfc_sli_cancel_iocbs(phba, &tx_completions, IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); } From df0101197c4d9596682901631f3ee193ed354873 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:36 -0700 Subject: [PATCH 048/423] scsi: lpfc: Fix queue failures when recovering from PCI parity error When recovering from a pci-parity error the driver is failing to re-create queues, causing recovery to fail. Looking deeper, it was found that the interrupt vector count allocated on the recovery was fewer than the vectors originally allocated. This disparity resulted in CPU map entries with stale information. When the driver tries to re-create the queues, it attempts to use the stale information which indicates an eq/interrupt vector that was no longer created. Fix by clearng the cpup map array before enabling and requesting the IRQs in the lpfc_sli_reset_slot_s4 routine(). Link: https://lore.kernel.org/r/20220317032737.45308-4-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index cf83bc0e27c0..461d333b1b3a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -15236,6 +15236,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { From 4f3beb36b1e46f1a04967a84e3a1bd55bf7a9e7a Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:37 -0700 Subject: [PATCH 049/423] scsi: lpfc: Update lpfc version to 14.2.0.1 Update lpfc version to 14.2.0.1 Link: https://lore.kernel.org/r/20220317032737.45308-5-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index e52f37e5d896..a4d3259b8c52 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.2.0.0" +#define LPFC_DRIVER_VERSION "14.2.0.1" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 7294a9bcaa7ee0d3b96aab1a277317315fd46f09 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 23 Mar 2022 13:55:44 -0700 Subject: [PATCH 050/423] scsi: lpfc: Fix broken SLI4 abort path There was a merge error in ther 14.2.0.0 patches that resulted in the SLI4 path using the SLI3 issue_abort_iotag() routine. This resulted in txcmplq corruption. Fix to use the SLI4 routine when SLI4. Link: https://lore.kernel.org/r/20220323205545.81814-2-jsmart2021@gmail.com Fixes: 31a59f75702f ("scsi: lpfc: SLI path split: Refactor Abort paths") Cc: # v5.2+ Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 3c132604fd91..ba9dbb51b75f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5929,13 +5929,15 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) } lpfc_cmd->waitq = &waitq; - if (phba->sli_rev == LPFC_SLI_REV4) + if (phba->sli_rev == LPFC_SLI_REV4) { spin_unlock(&pring_s4->ring_lock); - else + ret_val = lpfc_sli4_issue_abort_iotag(phba, iocb, + lpfc_sli_abort_fcp_cmpl); + } else { pring = &phba->sli.sli3_ring[LPFC_FCP_RING]; - - ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb, - lpfc_sli_abort_fcp_cmpl); + ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb, + lpfc_sli_abort_fcp_cmpl); + } /* Make sure HBA is alive */ lpfc_issue_hb_tmo(phba); From c26bd6602e1d348bfa754dc55e5608c922dd2801 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 23 Mar 2022 13:55:45 -0700 Subject: [PATCH 051/423] scsi: lpfc: Fix locking for lpfc_sli_iocbq_lookup() The rules changed for lpfc_sli_iocbq_lookup() vs locking. Prior, the routine properly took out the lock. In newly refactored code, the locks must be held when calling the routine. Fix lpfc_sli_process_sol_iocb() to take the locks before calling the routine. Fix lpfc_sli_handle_fast_ring_event() to not release the locks to call the routine. Link: https://lore.kernel.org/r/20220323205545.81814-3-jsmart2021@gmail.com Fixes: 1b64aa9eae28 ("scsi: lpfc: SLI path split: Refactor fast and slow paths to native SLI4") Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 70c3929a9fb4..bda2a7ba4e77 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -3721,7 +3721,15 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, unsigned long iflag; u32 ulp_command, ulp_status, ulp_word4, ulp_context, iotag; + if (phba->sli_rev == LPFC_SLI_REV4) + spin_lock_irqsave(&pring->ring_lock, iflag); + else + spin_lock_irqsave(&phba->hbalock, iflag); cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq); + if (phba->sli_rev == LPFC_SLI_REV4) + spin_unlock_irqrestore(&pring->ring_lock, iflag); + else + spin_unlock_irqrestore(&phba->hbalock, iflag); ulp_command = get_job_cmnd(phba, saveq); ulp_status = get_job_ulpstatus(phba, saveq); @@ -4058,10 +4066,8 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba, break; } - spin_unlock_irqrestore(&phba->hbalock, iflag); cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring, &rspiocbq); - spin_lock_irqsave(&phba->hbalock, iflag); if (unlikely(!cmdiocbq)) break; if (cmdiocbq->cmd_flag & LPFC_DRIVER_ABORTED) From 8ee15ea779c332f94d74d135212403f2d0defb5f Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Thu, 24 Mar 2022 16:01:46 +0900 Subject: [PATCH 052/423] scsi: ufs: core: Remove unused field in struct ufs_hba Remove unused fields 'rpm_lvl_attr' and 'spm_lvl_attr' in struct ufs_hba. Commit cbb6813ee771 ("scsi: ufs: sysfs: attribute group for existing sysfs entries.") removed all code using that field. Link: https://lore.kernel.org/r/413601558.101648105683746.JavaMail.epsvc@epcpadp4 Reviewed-by: Bart Van Assche Acked-by: Avri Altman Signed-off-by: Keoseong Park Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 88c20f3608c2..94f545be183a 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -820,8 +820,6 @@ struct ufs_hba { enum ufs_pm_level rpm_lvl; /* Desired UFS power management level during system PM */ enum ufs_pm_level spm_lvl; - struct device_attribute rpm_lvl_attr; - struct device_attribute spm_lvl_attr; int pm_op_in_progress; /* Auto-Hibernate Idle Timer register value */ From f06aa52cb2723ec67e92df463827b800d6c477d1 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Thu, 24 Mar 2022 14:46:03 +0100 Subject: [PATCH 053/423] scsi: core: scsi_logging: Fix a BUG The request_queue may be NULL in a request, for example when it comes from scsi_ioctl_reset(). Check it before use. Fixes: f3fa33acca9f ("block: remove the ->rq_disk field in struct request") Link: https://lore.kernel.org/r/20220324134603.28463-1-thenzl@redhat.com Reported-by: Changhui Zhong Reviewed-by: Christoph Hellwig Signed-off-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_logging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_logging.c b/drivers/scsi/scsi_logging.c index ff89de86545d..b02af340c2d3 100644 --- a/drivers/scsi/scsi_logging.c +++ b/drivers/scsi/scsi_logging.c @@ -30,7 +30,7 @@ static inline const char *scmd_name(const struct scsi_cmnd *scmd) { struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd); - if (!rq->q->disk) + if (!rq->q || !rq->q->disk) return NULL; return rq->q->disk->disk_name; } From f16aa285e6185271bc6812a176f7ae3dbd7fe28d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 26 Mar 2022 13:48:15 +0100 Subject: [PATCH 054/423] scsi: pmcraid: Remove the PMCRAID_PASSTHROUGH_IOCTL ioctl implementation The whole passthrough ioctl path looks completely broken. For example it DMA maps the scatterlist and after that copies data to it, which is prohibited by the DMA API contract. Moreover, in pmcraid_alloc_sglist(), the pointer returned by a sgl_alloc_order() call is not recorded anywhere which is pointless. So remove the PMCRAID_PASSTHROUGH_IOCTL ioctl implementation entirely. Should it be needed, we should reimplement it using the proper block layer request mapping helpers. Link: https://lore.kernel.org/r/7f27a70bec3f3dcaf46a29b1c630edd4792e71c0.1648298857.git.christophe.jaillet@wanadoo.fr Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.c | 491 ----------------------------------------- drivers/scsi/pmcraid.h | 33 --- 2 files changed, 524 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 928532180d32..fd674ed1febe 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3181,124 +3181,6 @@ static int pmcraid_build_ioadl( return 0; } -/** - * pmcraid_free_sglist - Frees an allocated SG buffer list - * @sglist: scatter/gather list pointer - * - * Free a DMA'able memory previously allocated with pmcraid_alloc_sglist - * - * Return value: - * none - */ -static void pmcraid_free_sglist(struct pmcraid_sglist *sglist) -{ - sgl_free_order(sglist->scatterlist, sglist->order); - kfree(sglist); -} - -/** - * pmcraid_alloc_sglist - Allocates memory for a SG list - * @buflen: buffer length - * - * Allocates a DMA'able buffer in chunks and assembles a scatter/gather - * list. - * - * Return value - * pointer to sglist / NULL on failure - */ -static struct pmcraid_sglist *pmcraid_alloc_sglist(int buflen) -{ - struct pmcraid_sglist *sglist; - int sg_size; - int order; - - sg_size = buflen / (PMCRAID_MAX_IOADLS - 1); - order = (sg_size > 0) ? get_order(sg_size) : 0; - - /* Allocate a scatter/gather list for the DMA */ - sglist = kzalloc(sizeof(struct pmcraid_sglist), GFP_KERNEL); - if (sglist == NULL) - return NULL; - - sglist->order = order; - sgl_alloc_order(buflen, order, false, GFP_KERNEL | __GFP_ZERO, - &sglist->num_sg); - - return sglist; -} - -/** - * pmcraid_copy_sglist - Copy user buffer to kernel buffer's SG list - * @sglist: scatter/gather list pointer - * @buffer: buffer pointer - * @len: buffer length - * @direction: data transfer direction - * - * Copy a user buffer into a buffer allocated by pmcraid_alloc_sglist - * - * Return value: - * 0 on success / other on failure - */ -static int pmcraid_copy_sglist( - struct pmcraid_sglist *sglist, - void __user *buffer, - u32 len, - int direction -) -{ - struct scatterlist *sg; - void *kaddr; - int bsize_elem; - int i; - int rc = 0; - - /* Determine the actual number of bytes per element */ - bsize_elem = PAGE_SIZE * (1 << sglist->order); - - sg = sglist->scatterlist; - - for (i = 0; i < (len / bsize_elem); i++, sg = sg_next(sg), buffer += bsize_elem) { - struct page *page = sg_page(sg); - - kaddr = kmap(page); - if (direction == DMA_TO_DEVICE) - rc = copy_from_user(kaddr, buffer, bsize_elem); - else - rc = copy_to_user(buffer, kaddr, bsize_elem); - - kunmap(page); - - if (rc) { - pmcraid_err("failed to copy user data into sg list\n"); - return -EFAULT; - } - - sg->length = bsize_elem; - } - - if (len % bsize_elem) { - struct page *page = sg_page(sg); - - kaddr = kmap(page); - - if (direction == DMA_TO_DEVICE) - rc = copy_from_user(kaddr, buffer, len % bsize_elem); - else - rc = copy_to_user(buffer, kaddr, len % bsize_elem); - - kunmap(page); - - sg->length = len % bsize_elem; - } - - if (rc) { - pmcraid_err("failed to copy user data into sg list\n"); - rc = -EFAULT; - } - - return rc; -} - /** * pmcraid_queuecommand_lck - Queue a mid-layer request * @scsi_cmd: scsi command struct @@ -3454,365 +3336,6 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode) return rc; } - -/** - * pmcraid_build_passthrough_ioadls - builds SG elements for passthrough - * commands sent over IOCTL interface - * - * @cmd : pointer to struct pmcraid_cmd - * @buflen : length of the request buffer - * @direction : data transfer direction - * - * Return value - * 0 on success, non-zero error code on failure - */ -static int pmcraid_build_passthrough_ioadls( - struct pmcraid_cmd *cmd, - int buflen, - int direction -) -{ - struct pmcraid_sglist *sglist = NULL; - struct scatterlist *sg = NULL; - struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb; - struct pmcraid_ioadl_desc *ioadl; - int i; - - sglist = pmcraid_alloc_sglist(buflen); - - if (!sglist) { - pmcraid_err("can't allocate memory for passthrough SGls\n"); - return -ENOMEM; - } - - sglist->num_dma_sg = dma_map_sg(&cmd->drv_inst->pdev->dev, - sglist->scatterlist, - sglist->num_sg, direction); - - if (!sglist->num_dma_sg || sglist->num_dma_sg > PMCRAID_MAX_IOADLS) { - dev_err(&cmd->drv_inst->pdev->dev, - "Failed to map passthrough buffer!\n"); - pmcraid_free_sglist(sglist); - return -EIO; - } - - cmd->sglist = sglist; - ioarcb->request_flags0 |= NO_LINK_DESCS; - - ioadl = pmcraid_init_ioadls(cmd, sglist->num_dma_sg); - - /* Initialize IOADL descriptor addresses */ - for_each_sg(sglist->scatterlist, sg, sglist->num_dma_sg, i) { - ioadl[i].data_len = cpu_to_le32(sg_dma_len(sg)); - ioadl[i].address = cpu_to_le64(sg_dma_address(sg)); - ioadl[i].flags = 0; - } - - /* setup the last descriptor */ - ioadl[i - 1].flags = IOADL_FLAGS_LAST_DESC; - - return 0; -} - - -/** - * pmcraid_release_passthrough_ioadls - release passthrough ioadls - * - * @cmd: pointer to struct pmcraid_cmd for which ioadls were allocated - * @buflen: size of the request buffer - * @direction: data transfer direction - * - * Return value - * 0 on success, non-zero error code on failure - */ -static void pmcraid_release_passthrough_ioadls( - struct pmcraid_cmd *cmd, - int buflen, - int direction -) -{ - struct pmcraid_sglist *sglist = cmd->sglist; - - if (buflen > 0) { - dma_unmap_sg(&cmd->drv_inst->pdev->dev, - sglist->scatterlist, - sglist->num_sg, - direction); - pmcraid_free_sglist(sglist); - cmd->sglist = NULL; - } -} - -/** - * pmcraid_ioctl_passthrough - handling passthrough IOCTL commands - * - * @pinstance: pointer to adapter instance structure - * @ioctl_cmd: ioctl code - * @buflen: unused - * @arg: pointer to pmcraid_passthrough_buffer user buffer - * - * Return value - * 0 on success, non-zero error code on failure - */ -static long pmcraid_ioctl_passthrough( - struct pmcraid_instance *pinstance, - unsigned int ioctl_cmd, - unsigned int buflen, - void __user *arg -) -{ - struct pmcraid_passthrough_ioctl_buffer *buffer; - struct pmcraid_ioarcb *ioarcb; - struct pmcraid_cmd *cmd; - struct pmcraid_cmd *cancel_cmd; - void __user *request_buffer; - unsigned long request_offset; - unsigned long lock_flags; - void __user *ioasa; - u32 ioasc; - int request_size; - int buffer_size; - u8 direction; - int rc = 0; - - /* If IOA reset is in progress, wait 10 secs for reset to complete */ - if (pinstance->ioa_reset_in_progress) { - rc = wait_event_interruptible_timeout( - pinstance->reset_wait_q, - !pinstance->ioa_reset_in_progress, - msecs_to_jiffies(10000)); - - if (!rc) - return -ETIMEDOUT; - else if (rc < 0) - return -ERESTARTSYS; - } - - /* If adapter is not in operational state, return error */ - if (pinstance->ioa_state != IOA_STATE_OPERATIONAL) { - pmcraid_err("IOA is not operational\n"); - return -ENOTTY; - } - - buffer_size = sizeof(struct pmcraid_passthrough_ioctl_buffer); - buffer = kmalloc(buffer_size, GFP_KERNEL); - - if (!buffer) { - pmcraid_err("no memory for passthrough buffer\n"); - return -ENOMEM; - } - - request_offset = - offsetof(struct pmcraid_passthrough_ioctl_buffer, request_buffer); - - request_buffer = arg + request_offset; - - rc = copy_from_user(buffer, arg, - sizeof(struct pmcraid_passthrough_ioctl_buffer)); - - ioasa = arg + offsetof(struct pmcraid_passthrough_ioctl_buffer, ioasa); - - if (rc) { - pmcraid_err("ioctl: can't copy passthrough buffer\n"); - rc = -EFAULT; - goto out_free_buffer; - } - - request_size = le32_to_cpu(buffer->ioarcb.data_transfer_length); - - if (buffer->ioarcb.request_flags0 & TRANSFER_DIR_WRITE) { - direction = DMA_TO_DEVICE; - } else { - direction = DMA_FROM_DEVICE; - } - - if (request_size < 0) { - rc = -EINVAL; - goto out_free_buffer; - } - - /* check if we have any additional command parameters */ - if (le16_to_cpu(buffer->ioarcb.add_cmd_param_length) - > PMCRAID_ADD_CMD_PARAM_LEN) { - rc = -EINVAL; - goto out_free_buffer; - } - - cmd = pmcraid_get_free_cmd(pinstance); - - if (!cmd) { - pmcraid_err("free command block is not available\n"); - rc = -ENOMEM; - goto out_free_buffer; - } - - cmd->scsi_cmd = NULL; - ioarcb = &(cmd->ioa_cb->ioarcb); - - /* Copy the user-provided IOARCB stuff field by field */ - ioarcb->resource_handle = buffer->ioarcb.resource_handle; - ioarcb->data_transfer_length = buffer->ioarcb.data_transfer_length; - ioarcb->cmd_timeout = buffer->ioarcb.cmd_timeout; - ioarcb->request_type = buffer->ioarcb.request_type; - ioarcb->request_flags0 = buffer->ioarcb.request_flags0; - ioarcb->request_flags1 = buffer->ioarcb.request_flags1; - memcpy(ioarcb->cdb, buffer->ioarcb.cdb, PMCRAID_MAX_CDB_LEN); - - if (buffer->ioarcb.add_cmd_param_length) { - ioarcb->add_cmd_param_length = - buffer->ioarcb.add_cmd_param_length; - ioarcb->add_cmd_param_offset = - buffer->ioarcb.add_cmd_param_offset; - memcpy(ioarcb->add_data.u.add_cmd_params, - buffer->ioarcb.add_data.u.add_cmd_params, - le16_to_cpu(buffer->ioarcb.add_cmd_param_length)); - } - - /* set hrrq number where the IOA should respond to. Note that all cmds - * generated internally uses hrrq_id 0, exception to this is the cmd - * block of scsi_cmd which is re-used (e.g. cancel/abort), which uses - * hrrq_id assigned here in queuecommand - */ - ioarcb->hrrq_id = atomic_add_return(1, &(pinstance->last_message_id)) % - pinstance->num_hrrq; - - if (request_size) { - rc = pmcraid_build_passthrough_ioadls(cmd, - request_size, - direction); - if (rc) { - pmcraid_err("couldn't build passthrough ioadls\n"); - goto out_free_cmd; - } - } - - /* If data is being written into the device, copy the data from user - * buffers - */ - if (direction == DMA_TO_DEVICE && request_size > 0) { - rc = pmcraid_copy_sglist(cmd->sglist, - request_buffer, - request_size, - direction); - if (rc) { - pmcraid_err("failed to copy user buffer\n"); - goto out_free_sglist; - } - } - - /* passthrough ioctl is a blocking command so, put the user to sleep - * until timeout. Note that a timeout value of 0 means, do timeout. - */ - cmd->cmd_done = pmcraid_internal_done; - init_completion(&cmd->wait_for_completion); - cmd->completion_req = 1; - - pmcraid_info("command(%d) (CDB[0] = %x) for %x\n", - le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2, - cmd->ioa_cb->ioarcb.cdb[0], - le32_to_cpu(cmd->ioa_cb->ioarcb.resource_handle)); - - spin_lock_irqsave(pinstance->host->host_lock, lock_flags); - _pmcraid_fire_command(cmd); - spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags); - - /* NOTE ! Remove the below line once abort_task is implemented - * in firmware. This line disables ioctl command timeout handling logic - * similar to IO command timeout handling, making ioctl commands to wait - * until the command completion regardless of timeout value specified in - * ioarcb - */ - buffer->ioarcb.cmd_timeout = 0; - - /* If command timeout is specified put caller to wait till that time, - * otherwise it would be blocking wait. If command gets timed out, it - * will be aborted. - */ - if (buffer->ioarcb.cmd_timeout == 0) { - wait_for_completion(&cmd->wait_for_completion); - } else if (!wait_for_completion_timeout( - &cmd->wait_for_completion, - msecs_to_jiffies(le16_to_cpu(buffer->ioarcb.cmd_timeout) * 1000))) { - - pmcraid_info("aborting cmd %d (CDB[0] = %x) due to timeout\n", - le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2, - cmd->ioa_cb->ioarcb.cdb[0]); - - spin_lock_irqsave(pinstance->host->host_lock, lock_flags); - cancel_cmd = pmcraid_abort_cmd(cmd); - spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags); - - if (cancel_cmd) { - wait_for_completion(&cancel_cmd->wait_for_completion); - ioasc = le32_to_cpu(cancel_cmd->ioa_cb->ioasa.ioasc); - pmcraid_return_cmd(cancel_cmd); - - /* if abort task couldn't find the command i.e it got - * completed prior to aborting, return good completion. - * if command got aborted successfully or there was IOA - * reset due to abort task itself getting timedout then - * return -ETIMEDOUT - */ - if (ioasc == PMCRAID_IOASC_IOA_WAS_RESET || - PMCRAID_IOASC_SENSE_KEY(ioasc) == 0x00) { - if (ioasc != PMCRAID_IOASC_GC_IOARCB_NOTFOUND) - rc = -ETIMEDOUT; - goto out_handle_response; - } - } - - /* no command block for abort task or abort task failed to abort - * the IOARCB, then wait for 150 more seconds and initiate reset - * sequence after timeout - */ - if (!wait_for_completion_timeout( - &cmd->wait_for_completion, - msecs_to_jiffies(150 * 1000))) { - pmcraid_reset_bringup(cmd->drv_inst); - rc = -ETIMEDOUT; - } - } - -out_handle_response: - /* copy entire IOASA buffer and return IOCTL success. - * If copying IOASA to user-buffer fails, return - * EFAULT - */ - if (copy_to_user(ioasa, &cmd->ioa_cb->ioasa, - sizeof(struct pmcraid_ioasa))) { - pmcraid_err("failed to copy ioasa buffer to user\n"); - rc = -EFAULT; - } - - /* If the data transfer was from device, copy the data onto user - * buffers - */ - else if (direction == DMA_FROM_DEVICE && request_size > 0) { - rc = pmcraid_copy_sglist(cmd->sglist, - request_buffer, - request_size, - direction); - if (rc) { - pmcraid_err("failed to copy user buffer\n"); - rc = -EFAULT; - } - } - -out_free_sglist: - pmcraid_release_passthrough_ioadls(cmd, request_size, direction); - -out_free_cmd: - pmcraid_return_cmd(cmd); - -out_free_buffer: - kfree(buffer); - - return rc; -} - - - - /** * pmcraid_ioctl_driver - ioctl handler for commands handled by driver itself * @@ -3922,20 +3445,6 @@ static long pmcraid_chr_ioctl( switch (_IOC_TYPE(cmd)) { - case PMCRAID_PASSTHROUGH_IOCTL: - /* If ioctl code is to download microcode, we need to block - * mid-layer requests. - */ - if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE) - scsi_block_requests(pinstance->host); - - retval = pmcraid_ioctl_passthrough(pinstance, cmd, - hdr->buffer_length, argp); - - if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE) - scsi_unblock_requests(pinstance->host); - break; - case PMCRAID_DRIVER_IOCTL: arg += sizeof(struct pmcraid_ioctl_header); retval = pmcraid_ioctl_driver(pinstance, cmd, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index bbb75318f1e7..9f59930e8b4f 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -1022,41 +1022,16 @@ struct pmcraid_ioctl_header { #define PMCRAID_IOCTL_SIGNATURE "PMCRAID" -/* - * pmcraid_passthrough_ioctl_buffer - structure given as argument to - * passthrough(or firmware handled) IOCTL commands. Note that ioarcb requires - * 32-byte alignment so, it is necessary to pack this structure to avoid any - * holes between ioctl_header and passthrough buffer - * - * .ioactl_header : ioctl header - * .ioarcb : filled-up ioarcb buffer, driver always reads this buffer - * .ioasa : buffer for ioasa, driver fills this with IOASA from firmware - * .request_buffer: The I/O buffer (flat), driver reads/writes to this based on - * the transfer directions passed in ioarcb.flags0. Contents - * of this buffer are valid only when ioarcb.data_transfer_len - * is not zero. - */ -struct pmcraid_passthrough_ioctl_buffer { - struct pmcraid_ioctl_header ioctl_header; - struct pmcraid_ioarcb ioarcb; - struct pmcraid_ioasa ioasa; - u8 request_buffer[]; -} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT))); - /* * keys to differentiate between driver handled IOCTLs and passthrough * IOCTLs passed to IOA. driver determines the ioctl type using macro * _IOC_TYPE */ #define PMCRAID_DRIVER_IOCTL 'D' -#define PMCRAID_PASSTHROUGH_IOCTL 'F' #define DRV_IOCTL(n, size) \ _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_DRIVER_IOCTL, (n), (size)) -#define FMW_IOCTL(n, size) \ - _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_PASSTHROUGH_IOCTL, (n), (size)) - /* * _ARGSIZE: macro that gives size of the argument type passed to an IOCTL cmd. * This is to facilitate applications avoiding un-necessary memory allocations. @@ -1069,12 +1044,4 @@ struct pmcraid_passthrough_ioctl_buffer { #define PMCRAID_IOCTL_RESET_ADAPTER \ DRV_IOCTL(5, sizeof(struct pmcraid_ioctl_header)) -/* passthrough/firmware handled commands */ -#define PMCRAID_IOCTL_PASSTHROUGH_COMMAND \ - FMW_IOCTL(1, sizeof(struct pmcraid_passthrough_ioctl_buffer)) - -#define PMCRAID_IOCTL_DOWNLOAD_MICROCODE \ - FMW_IOCTL(2, sizeof(struct pmcraid_passthrough_ioctl_buffer)) - - #endif /* _PMCRAID_H */ From bc5519c18a32ce855bb51b9f5eceb77a9489d080 Mon Sep 17 00:00:00 2001 From: Kevin Groeneveld Date: Tue, 22 Mar 2022 20:22:42 -0400 Subject: [PATCH 055/423] scsi: sr: Fix typo in CDROM(CLOSETRAY|EJECT) handling Commit 2e27f576abc6 ("scsi: scsi_ioctl: Call scsi_cmd_ioctl() from scsi_ioctl()") seems to have a typo as it is checking ret instead of cmd in the if statement checking for CDROMCLOSETRAY and CDROMEJECT. This changes the behaviour of these ioctls as the cdrom_ioctl handling of these is more restrictive than the scsi_ioctl version. Link: https://lore.kernel.org/r/20220323002242.21157-1-kgroeneveld@lenbrook.com Fixes: 2e27f576abc6 ("scsi: scsi_ioctl: Call scsi_cmd_ioctl() from scsi_ioctl()") Reviewed-by: Christoph Hellwig Signed-off-by: Kevin Groeneveld Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index aaa54ad5f035..f232514f5a2b 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -578,7 +578,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, scsi_autopm_get_device(sdev); - if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) { + if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) { ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); if (ret != -ENOSYS) goto put; From 0bade8e53279157c7cc9dd95d573b7e82223d78a Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 22 Mar 2022 12:44:43 -0700 Subject: [PATCH 056/423] scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024 The adapter request_limit is hardcoded to be INITIAL_SRP_LIMIT which is currently an arbitrary value of 800. Increase this value to 1024 which better matches the characteristics of the typical IBMi Initiator that supports 32 LUNs and a queue depth of 32. This change also has the secondary benefit of being a power of two as required by the kfifo API. Since, Commit ab9bb6318b09 ("Partially revert "kfifo: fix kfifo_alloc() and kfifo_init()"") the size of IU pool for each target has been rounded down to 512 when attempting to kfifo_init() those pools with the current request_limit size of 800. Link: https://lore.kernel.org/r/20220322194443.678433-1-tyreld@linux.ibm.com Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 80238e6a3c98..eee1a24f7e15 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -36,7 +36,7 @@ #define IBMVSCSIS_VERSION "v0.2" -#define INITIAL_SRP_LIMIT 800 +#define INITIAL_SRP_LIMIT 1024 #define DEFAULT_MAX_SECTORS 256 #define MAX_TXU 1024 * 1024 From 63221571ef77c71a37d3c604d604168ce1de5cab Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 24 Mar 2022 08:29:37 +1100 Subject: [PATCH 057/423] scsi: aha152x: Stop using struct scsi_pointer Remove aha152x_cmd_priv.scsi_pointer by moving the necessary members into aha152x_cmd_priv proper. Tested with an Adaptec SlimSCSI APA-1460A card. Link: https://lore.kernel.org/r/bdc1264b6dd331150bffb737958cab8c9c068fa1.1648070977.git.fthain@linux-m68k.org Cc: Christoph Hellwig Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/aha152x.c | 235 ++++++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 122 deletions(-) diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 5f554a3a0f62..caeebfb67149 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -317,14 +317,18 @@ enum { }; struct aha152x_cmd_priv { - struct scsi_pointer scsi_pointer; + char *ptr; + int this_residual; + struct scatterlist *buffer; + int status; + int message; + int sent_command; + int phase; }; -static struct scsi_pointer *aha152x_scsi_pointer(struct scsi_cmnd *cmd) +static struct aha152x_cmd_priv *aha152x_priv(struct scsi_cmnd *cmd) { - struct aha152x_cmd_priv *acmd = scsi_cmd_priv(cmd); - - return &acmd->scsi_pointer; + return scsi_cmd_priv(cmd); } MODULE_AUTHOR("Jürgen Fischer"); @@ -890,17 +894,16 @@ void aha152x_release(struct Scsi_Host *shpnt) static int setup_expected_interrupts(struct Scsi_Host *shpnt) { if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); - scsi_pointer->phase |= 1 << 16; + acp->phase |= 1 << 16; - if (scsi_pointer->phase & selecting) { + if (acp->phase & selecting) { SETPORT(SSTAT1, SELTO); SETPORT(SIMODE0, ENSELDO | (DISCONNECTED_SC ? ENSELDI : 0)); SETPORT(SIMODE1, ENSELTIMO); } else { - SETPORT(SIMODE0, (scsi_pointer->phase & spiordy) ? ENSPIORDY : 0); + SETPORT(SIMODE0, (acp->phase & spiordy) ? ENSPIORDY : 0); SETPORT(SIMODE1, ENPHASEMIS | ENSCSIRST | ENSCSIPERR | ENBUSFREE); } } else if(STATE==seldi) { @@ -924,17 +927,16 @@ static int setup_expected_interrupts(struct Scsi_Host *shpnt) static int aha152x_internal_queue(struct scsi_cmnd *SCpnt, struct completion *complete, int phase) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(SCpnt); + struct aha152x_cmd_priv *acp = aha152x_priv(SCpnt); struct Scsi_Host *shpnt = SCpnt->device->host; unsigned long flags; - scsi_pointer->phase = not_issued | phase; - scsi_pointer->Status = 0x1; /* Ilegal status by SCSI standard */ - scsi_pointer->Message = 0; - scsi_pointer->have_data_in = 0; - scsi_pointer->sent_command = 0; + acp->phase = not_issued | phase; + acp->status = 0x1; /* Illegal status by SCSI standard */ + acp->message = 0; + acp->sent_command = 0; - if (scsi_pointer->phase & (resetting | check_condition)) { + if (acp->phase & (resetting | check_condition)) { if (!SCpnt->host_scribble || SCSEM(SCpnt) || SCNEXT(SCpnt)) { scmd_printk(KERN_ERR, SCpnt, "cannot reuse command\n"); return FAILED; @@ -957,15 +959,15 @@ static int aha152x_internal_queue(struct scsi_cmnd *SCpnt, SCp.phase : current state of the command */ if ((phase & resetting) || !scsi_sglist(SCpnt)) { - scsi_pointer->ptr = NULL; - scsi_pointer->this_residual = 0; + acp->ptr = NULL; + acp->this_residual = 0; scsi_set_resid(SCpnt, 0); - scsi_pointer->buffer = NULL; + acp->buffer = NULL; } else { scsi_set_resid(SCpnt, scsi_bufflen(SCpnt)); - scsi_pointer->buffer = scsi_sglist(SCpnt); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = scsi_sglist(SCpnt); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } DO_LOCK(flags); @@ -1015,7 +1017,7 @@ static void reset_done(struct scsi_cmnd *SCpnt) static void aha152x_scsi_done(struct scsi_cmnd *SCpnt) { - if (aha152x_scsi_pointer(SCpnt)->phase & resetting) + if (aha152x_priv(SCpnt)->phase & resetting) reset_done(SCpnt); else scsi_done(SCpnt); @@ -1101,7 +1103,7 @@ static int aha152x_device_reset(struct scsi_cmnd * SCpnt) DO_LOCK(flags); - if (aha152x_scsi_pointer(SCpnt)->phase & resetted) { + if (aha152x_priv(SCpnt)->phase & resetted) { HOSTDATA(shpnt)->commands--; if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); @@ -1395,31 +1397,30 @@ static void busfree_run(struct Scsi_Host *shpnt) SETPORT(SSTAT1, CLRBUSFREE); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); #if defined(AHA152X_STAT) action++; #endif - scsi_pointer->phase &= ~syncneg; + acp->phase &= ~syncneg; - if (scsi_pointer->phase & completed) { + if (acp->phase & completed) { /* target sent COMMAND COMPLETE */ - done(shpnt, scsi_pointer->Status, DID_OK); + done(shpnt, acp->status, DID_OK); - } else if (scsi_pointer->phase & aborted) { - done(shpnt, scsi_pointer->Status, DID_ABORT); + } else if (acp->phase & aborted) { + done(shpnt, acp->status, DID_ABORT); - } else if (scsi_pointer->phase & resetted) { - done(shpnt, scsi_pointer->Status, DID_RESET); + } else if (acp->phase & resetted) { + done(shpnt, acp->status, DID_RESET); - } else if (scsi_pointer->phase & disconnected) { + } else if (acp->phase & disconnected) { /* target sent DISCONNECT */ #if defined(AHA152X_STAT) HOSTDATA(shpnt)->disconnections++; #endif append_SC(&DISCONNECTED_SC, CURRENT_SC); - scsi_pointer->phase |= 1 << 16; + acp->phase |= 1 << 16; CURRENT_SC = NULL; } else { @@ -1438,24 +1439,23 @@ static void busfree_run(struct Scsi_Host *shpnt) action++; #endif - if (aha152x_scsi_pointer(DONE_SC)->phase & check_condition) { + if (aha152x_priv(DONE_SC)->phase & check_condition) { struct scsi_cmnd *cmd = HOSTDATA(shpnt)->done_SC; struct aha152x_scdata *sc = SCDATA(cmd); scsi_eh_restore_cmnd(cmd, &sc->ses); - aha152x_scsi_pointer(cmd)->Status = SAM_STAT_CHECK_CONDITION; + aha152x_priv(cmd)->status = SAM_STAT_CHECK_CONDITION; HOSTDATA(shpnt)->commands--; if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); /* turn led off */ - } else if (aha152x_scsi_pointer(DONE_SC)->Status == - SAM_STAT_CHECK_CONDITION) { + } else if (aha152x_priv(DONE_SC)->status == SAM_STAT_CHECK_CONDITION) { #if defined(AHA152X_STAT) HOSTDATA(shpnt)->busfree_with_check_condition++; #endif - if(!(aha152x_scsi_pointer(DONE_SC)->phase & not_issued)) { + if (!(aha152x_priv(DONE_SC)->phase & not_issued)) { struct aha152x_scdata *sc; struct scsi_cmnd *ptr = DONE_SC; DONE_SC=NULL; @@ -1480,7 +1480,7 @@ static void busfree_run(struct Scsi_Host *shpnt) if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); /* turn led off */ - if (!(aha152x_scsi_pointer(ptr)->phase & resetting)) { + if (!(aha152x_priv(ptr)->phase & resetting)) { kfree(ptr->host_scribble); ptr->host_scribble=NULL; } @@ -1503,13 +1503,12 @@ static void busfree_run(struct Scsi_Host *shpnt) DO_UNLOCK(flags); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); #if defined(AHA152X_STAT) action++; #endif - scsi_pointer->phase |= selecting; + acp->phase |= selecting; /* clear selection timeout */ SETPORT(SSTAT1, SELTO); @@ -1537,13 +1536,13 @@ static void busfree_run(struct Scsi_Host *shpnt) */ static void seldo_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); SETPORT(SCSISIG, 0); SETPORT(SSTAT1, CLRBUSFREE); SETPORT(SSTAT1, CLRPHASECHG); - scsi_pointer->phase &= ~(selecting | not_issued); + acp->phase &= ~(selecting | not_issued); SETPORT(SCSISEQ, 0); @@ -1558,12 +1557,12 @@ static void seldo_run(struct Scsi_Host *shpnt) ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun)); - if (scsi_pointer->phase & aborting) { + if (acp->phase & aborting) { ADDMSGO(ABORT); - } else if (scsi_pointer->phase & resetting) { + } else if (acp->phase & resetting) { ADDMSGO(BUS_DEVICE_RESET); } else if (SYNCNEG==0 && SYNCHRONOUS) { - scsi_pointer->phase |= syncneg; + acp->phase |= syncneg; MSGOLEN += spi_populate_sync_msg(&MSGO(MSGOLEN), 50, 8); SYNCNEG=1; /* negotiation in progress */ } @@ -1578,7 +1577,7 @@ static void seldo_run(struct Scsi_Host *shpnt) */ static void selto_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp; SETPORT(SCSISEQ, 0); SETPORT(SSTAT1, CLRSELTIMO); @@ -1586,9 +1585,10 @@ static void selto_run(struct Scsi_Host *shpnt) if (!CURRENT_SC) return; - scsi_pointer->phase &= ~selecting; + acp = aha152x_priv(CURRENT_SC); + acp->phase &= ~selecting; - if (scsi_pointer->phase & aborted) + if (acp->phase & aborted) done(shpnt, SAM_STAT_GOOD, DID_ABORT); else if (TESTLO(SSTAT0, SELINGO)) done(shpnt, SAM_STAT_GOOD, DID_BUS_BUSY); @@ -1616,10 +1616,9 @@ static void seldi_run(struct Scsi_Host *shpnt) SETPORT(SSTAT1, CLRPHASECHG); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); - if (!(scsi_pointer->phase & not_issued)) + if (!(acp->phase & not_issued)) scmd_printk(KERN_ERR, CURRENT_SC, "command should not have been issued yet\n"); @@ -1676,7 +1675,7 @@ static void seldi_run(struct Scsi_Host *shpnt) static void msgi_run(struct Scsi_Host *shpnt) { for(;;) { - struct scsi_pointer *scsi_pointer; + struct aha152x_cmd_priv *acp; int sstat1 = GETPORT(SSTAT1); if(sstat1 & (PHASECHG|PHASEMIS|BUSFREE) || !(sstat1 & REQINIT)) @@ -1714,9 +1713,9 @@ static void msgi_run(struct Scsi_Host *shpnt) continue; } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - scsi_pointer->Message = MSGI(0); - scsi_pointer->phase &= ~disconnected; + acp = aha152x_priv(CURRENT_SC); + acp->message = MSGI(0); + acp->phase &= ~disconnected; MSGILEN=0; @@ -1724,8 +1723,8 @@ static void msgi_run(struct Scsi_Host *shpnt) continue; } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - scsi_pointer->Message = MSGI(0); + acp = aha152x_priv(CURRENT_SC); + acp->message = MSGI(0); switch (MSGI(0)) { case DISCONNECT: @@ -1733,11 +1732,11 @@ static void msgi_run(struct Scsi_Host *shpnt) scmd_printk(KERN_WARNING, CURRENT_SC, "target was not allowed to disconnect\n"); - scsi_pointer->phase |= disconnected; + acp->phase |= disconnected; break; case COMMAND_COMPLETE: - scsi_pointer->phase |= completed; + acp->phase |= completed; break; case MESSAGE_REJECT: @@ -1867,11 +1866,9 @@ static void msgi_end(struct Scsi_Host *shpnt) */ static void msgo_init(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - if(MSGOLEN==0) { - if ((scsi_pointer->phase & syncneg) && SYNCNEG==2 && - SYNCRATE==0) { + if ((aha152x_priv(CURRENT_SC)->phase & syncneg) && + SYNCNEG == 2 && SYNCRATE == 0) { ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun)); } else { scmd_printk(KERN_INFO, CURRENT_SC, @@ -1888,7 +1885,7 @@ static void msgo_init(struct Scsi_Host *shpnt) */ static void msgo_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); while(MSGO_Iphase |= identified; + acp->phase |= identified; if (MSGO(MSGO_I)==ABORT) - scsi_pointer->phase |= aborted; + acp->phase |= aborted; if (MSGO(MSGO_I)==BUS_DEVICE_RESET) - scsi_pointer->phase |= resetted; + acp->phase |= resetted; SETPORT(SCSIDAT, MSGO(MSGO_I++)); } @@ -1936,7 +1933,7 @@ static void msgo_end(struct Scsi_Host *shpnt) */ static void cmd_init(struct Scsi_Host *shpnt) { - if (aha152x_scsi_pointer(CURRENT_SC)->sent_command) { + if (aha152x_priv(CURRENT_SC)->sent_command) { scmd_printk(KERN_ERR, CURRENT_SC, "command already sent\n"); done(shpnt, SAM_STAT_GOOD, DID_ERROR); @@ -1967,7 +1964,7 @@ static void cmd_end(struct Scsi_Host *shpnt) "command sent incompletely (%d/%d)\n", CMD_I, CURRENT_SC->cmd_len); else - aha152x_scsi_pointer(CURRENT_SC)->sent_command++; + aha152x_priv(CURRENT_SC)->sent_command++; } /* @@ -1979,7 +1976,7 @@ static void status_run(struct Scsi_Host *shpnt) if (TESTLO(SSTAT0, SPIORDY)) return; - aha152x_scsi_pointer(CURRENT_SC)->Status = GETPORT(SCSIDAT); + aha152x_priv(CURRENT_SC)->status = GETPORT(SCSIDAT); } @@ -2003,7 +2000,7 @@ static void datai_init(struct Scsi_Host *shpnt) static void datai_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer; + struct aha152x_cmd_priv *acp; unsigned long the_time; int fifodata, data_count; @@ -2041,36 +2038,35 @@ static void datai_run(struct Scsi_Host *shpnt) fifodata = GETPORT(FIFOSTAT); } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - if (scsi_pointer->this_residual > 0) { - while (fifodata > 0 && scsi_pointer->this_residual > 0) { - data_count = fifodata > scsi_pointer->this_residual ? - scsi_pointer->this_residual : - fifodata; + acp = aha152x_priv(CURRENT_SC); + if (acp->this_residual > 0) { + while (fifodata > 0 && acp->this_residual > 0) { + data_count = fifodata > acp->this_residual ? + acp->this_residual : fifodata; fifodata -= data_count; if (data_count & 1) { SETPORT(DMACNTRL0, ENDMA|_8BIT); - *scsi_pointer->ptr++ = GETPORT(DATAPORT); - scsi_pointer->this_residual--; + *acp->ptr++ = GETPORT(DATAPORT); + acp->this_residual--; DATA_LEN++; SETPORT(DMACNTRL0, ENDMA); } if (data_count > 1) { data_count >>= 1; - insw(DATAPORT, scsi_pointer->ptr, data_count); - scsi_pointer->ptr += 2 * data_count; - scsi_pointer->this_residual -= 2 * data_count; + insw(DATAPORT, acp->ptr, data_count); + acp->ptr += 2 * data_count; + acp->this_residual -= 2 * data_count; DATA_LEN += 2 * data_count; } - if (scsi_pointer->this_residual == 0 && - !sg_is_last(scsi_pointer->buffer)) { + if (acp->this_residual == 0 && + !sg_is_last(acp->buffer)) { /* advance to next buffer */ - scsi_pointer->buffer = sg_next(scsi_pointer->buffer); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = sg_next(acp->buffer); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } } } else if (fifodata > 0) { @@ -2138,15 +2134,15 @@ static void datao_init(struct Scsi_Host *shpnt) static void datao_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); unsigned long the_time; int data_count; /* until phase changes or all data sent */ - while (TESTLO(DMASTAT, INTSTAT) && scsi_pointer->this_residual > 0) { + while (TESTLO(DMASTAT, INTSTAT) && acp->this_residual > 0) { data_count = 128; - if (data_count > scsi_pointer->this_residual) - data_count = scsi_pointer->this_residual; + if (data_count > acp->this_residual) + data_count = acp->this_residual; if(TESTLO(DMASTAT, DFIFOEMP)) { scmd_printk(KERN_ERR, CURRENT_SC, @@ -2157,26 +2153,25 @@ static void datao_run(struct Scsi_Host *shpnt) if(data_count & 1) { SETPORT(DMACNTRL0,WRITE_READ|ENDMA|_8BIT); - SETPORT(DATAPORT, *scsi_pointer->ptr++); - scsi_pointer->this_residual--; + SETPORT(DATAPORT, *acp->ptr++); + acp->this_residual--; CMD_INC_RESID(CURRENT_SC, -1); SETPORT(DMACNTRL0,WRITE_READ|ENDMA); } if(data_count > 1) { data_count >>= 1; - outsw(DATAPORT, scsi_pointer->ptr, data_count); - scsi_pointer->ptr += 2 * data_count; - scsi_pointer->this_residual -= 2 * data_count; + outsw(DATAPORT, acp->ptr, data_count); + acp->ptr += 2 * data_count; + acp->this_residual -= 2 * data_count; CMD_INC_RESID(CURRENT_SC, -2 * data_count); } - if (scsi_pointer->this_residual == 0 && - !sg_is_last(scsi_pointer->buffer)) { + if (acp->this_residual == 0 && !sg_is_last(acp->buffer)) { /* advance to next buffer */ - scsi_pointer->buffer = sg_next(scsi_pointer->buffer); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = sg_next(acp->buffer); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } the_time=jiffies + 100*HZ; @@ -2192,7 +2187,7 @@ static void datao_run(struct Scsi_Host *shpnt) static void datao_end(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); if(TESTLO(DMASTAT, DFIFOEMP)) { u32 datao_cnt = GETSTCNT(); @@ -2211,10 +2206,9 @@ static void datao_end(struct Scsi_Host *shpnt) sg = sg_next(sg); } - scsi_pointer->buffer = sg; - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer) + done; - scsi_pointer->this_residual = scsi_pointer->buffer->length - - done; + acp->buffer = sg; + acp->ptr = SG_ADDRESS(acp->buffer) + done; + acp->this_residual = acp->buffer->length - done; } SETPORT(SXFRCTL0, CH1|CLRCH1|CLRSTCNT); @@ -2229,7 +2223,6 @@ static void datao_end(struct Scsi_Host *shpnt) */ static int update_state(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); int dataphase=0; unsigned int stat0 = GETPORT(SSTAT0); unsigned int stat1 = GETPORT(SSTAT1); @@ -2244,7 +2237,7 @@ static int update_state(struct Scsi_Host *shpnt) } else if (stat0 & SELDI && PREVSTATE == busfree) { STATE=seldi; } else if (stat0 & SELDO && CURRENT_SC && - (scsi_pointer->phase & selecting)) { + (aha152x_priv(CURRENT_SC)->phase & selecting)) { STATE=seldo; } else if(stat1 & SELTO) { STATE=selto; @@ -2376,8 +2369,7 @@ static void is_complete(struct Scsi_Host *shpnt) SETPORT(SXFRCTL0, CH1); SETPORT(DMACNTRL0, 0); if(CURRENT_SC) - aha152x_scsi_pointer(CURRENT_SC)->phase &= - ~spiordy; + aha152x_priv(CURRENT_SC)->phase &= ~spiordy; } /* @@ -2399,8 +2391,7 @@ static void is_complete(struct Scsi_Host *shpnt) SETPORT(DMACNTRL0, 0); SETPORT(SXFRCTL0, CH1|SPIOEN); if(CURRENT_SC) - aha152x_scsi_pointer(CURRENT_SC)->phase |= - spiordy; + aha152x_priv(CURRENT_SC)->phase |= spiordy; } /* @@ -2490,7 +2481,7 @@ static void disp_enintr(struct Scsi_Host *shpnt) */ static void show_command(struct scsi_cmnd *ptr) { - const int phase = aha152x_scsi_pointer(ptr)->phase; + const int phase = aha152x_priv(ptr)->phase; scsi_print_command(ptr); scmd_printk(KERN_DEBUG, ptr, @@ -2538,8 +2529,8 @@ static void show_queues(struct Scsi_Host *shpnt) static void get_command(struct seq_file *m, struct scsi_cmnd * ptr) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(ptr); - const int phase = scsi_pointer->phase; + struct aha152x_cmd_priv *acp = aha152x_priv(ptr); + const int phase = acp->phase; int i; seq_printf(m, "%p: target=%d; lun=%d; cmnd=( ", @@ -2549,8 +2540,8 @@ static void get_command(struct seq_file *m, struct scsi_cmnd * ptr) seq_printf(m, "0x%02x ", ptr->cmnd[i]); seq_printf(m, "); resid=%d; residual=%d; buffers=%d; phase |", - scsi_get_resid(ptr), scsi_pointer->this_residual, - sg_nents(scsi_pointer->buffer) - 1); + scsi_get_resid(ptr), acp->this_residual, + sg_nents(acp->buffer) - 1); if (phase & not_issued) seq_puts(m, "not issued|"); From eaba83b5b8506bbc9ee7ca2f10aeab3fff3719e7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 16 Mar 2022 17:44:30 +0800 Subject: [PATCH 058/423] scsi: core: Fix sbitmap depth in scsi_realloc_sdev_budget_map() In commit edb854a3680b ("scsi: core: Reallocate device's budget map on queue depth change"), the sbitmap for the device budget map may be reallocated after the slave device depth is configured. When the sbitmap is reallocated we use the result from scsi_device_max_queue_depth() for the sbitmap size, but don't resize to match the actual device queue depth. Fix by resizing the sbitmap after reallocating the budget sbitmap. We do this instead of init'ing the sbitmap to the device queue depth as the user may want to change the queue depth later via sysfs or other. Link: https://lore.kernel.org/r/1647423870-143867-1-git-send-email-john.garry@huawei.com Fixes: edb854a3680b ("scsi: core: Reallocate device's budget map on queue depth change") Tested-by: Damien Le Moal Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f4e6c68ac99e..2ef78083f1ef 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -223,6 +223,8 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int ret; struct sbitmap sb_backup; + depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev)); + /* * realloc if new shift is calculated, which is caused by setting * up one new default queue depth after calling ->slave_configure @@ -245,6 +247,9 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, scsi_device_max_queue_depth(sdev), new_shift, GFP_KERNEL, sdev->request_queue->node, false, true); + if (!ret) + sbitmap_resize(&sdev->budget_map, depth); + if (need_free) { if (ret) sdev->budget_map = sb_backup; From fac952bb546a9f103a769d7105194175e11abc99 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Mar 2022 23:56:15 +0000 Subject: [PATCH 059/423] scsi: isci: Fix spelling mistake "doesnt" -> "doesn't" There are a few spelling mistakes in dev_warn and dev_err messages. Fix these. Link: https://lore.kernel.org/r/20220316235615.56683-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/host.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index d690d9cf7eb1..35589b6af90d 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -413,7 +413,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) dev_warn(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received " "event 0x%x for io request object " - "that doesnt exist.\n", + "that doesn't exist.\n", __func__, ihost, ent); @@ -428,7 +428,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) dev_warn(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received " "event 0x%x for remote device object " - "that doesnt exist.\n", + "that doesn't exist.\n", __func__, ihost, ent); @@ -462,7 +462,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) } else dev_err(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received event 0x%x " - "for remote device object 0x%0x that doesnt " + "for remote device object 0x%0x that doesn't " "exist.\n", __func__, ihost, From 41b8c2a31472a97349fe54c3a6b3176d9cdc31be Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 16 Mar 2022 12:20:06 -0700 Subject: [PATCH 060/423] scsi: virtio-scsi: Eliminate anonymous module_init & module_exit Eliminate anonymous module_init() and module_exit(), which can lead to confusion or ambiguity when reading System.map, crashes/oops/bugs, or an initcall_debug log. Give each of these init and exit functions unique driver-specific names to eliminate the anonymous names. Example 1: (System.map) ffffffff832fc78c t init ffffffff832fc79e t init ffffffff832fc8f8 t init Example 2: (initcall_debug log) calling init+0x0/0x12 @ 1 initcall init+0x0/0x12 returned 0 after 15 usecs calling init+0x0/0x60 @ 1 initcall init+0x0/0x60 returned 0 after 2 usecs calling init+0x0/0x9a @ 1 initcall init+0x0/0x9a returned 0 after 74 usecs Link: https://lore.kernel.org/r/20220316192010.19001-6-rdunlap@infradead.org Fixes: 4fe74b1cb051 ("[SCSI] virtio-scsi: SCSI driver for QEMU based virtual machines") Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Reviewed-by: Stefan Hajnoczi Reviewed-by: Ira Weiny Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 0e6110da69e7..578c4b6d0f7d 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -988,7 +988,7 @@ static struct virtio_driver virtio_scsi_driver = { .remove = virtscsi_remove, }; -static int __init init(void) +static int __init virtio_scsi_init(void) { int ret = -ENOMEM; @@ -1020,14 +1020,14 @@ error: return ret; } -static void __exit fini(void) +static void __exit virtio_scsi_fini(void) { unregister_virtio_driver(&virtio_scsi_driver); mempool_destroy(virtscsi_cmd_pool); kmem_cache_destroy(virtscsi_cmd_cache); } -module_init(init); -module_exit(fini); +module_init(virtio_scsi_init); +module_exit(virtio_scsi_fini); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio SCSI HBA driver"); From 066f4c31945ce2cb30e840794ee01713dec73b74 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Mar 2022 10:52:14 +0300 Subject: [PATCH 061/423] scsi: hisi_sas: Remove stray fallthrough annotation This case statement doesn't fall through any more so remove the fallthrough annotation. Link: https://lore.kernel.org/r/20220317075214.GC25237@kili Acked-by: John Garry Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 461ef8a76c4c..4bda2f6cb352 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -442,7 +442,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba, case SAS_PROTOCOL_INTERNAL_ABORT: hisi_sas_task_prep_abort(hisi_hba, slot); break; - fallthrough; default: return; } From 99241e119f4a2077383f994a174b38ced21d6fca Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Tue, 29 Mar 2022 10:12:51 +0800 Subject: [PATCH 062/423] scsi: core: sysfs: Remove comments that conflict with the actual logic Christoph Hellwig Says: ======================= I think we should just handle the error properly and remove the comment. There's no good reason to ignore bsg registration errors. In fact, after commit 92c4b58b15c5 ("scsi: core: Register sysfs attributes earlier"), we are already forced to return errno. We discuss this issue in [1]. [1] https://lore.kernel.org/all/20211022010201.426746-1-liu.yun@linux.dev/ Link: https://lore.kernel.org/r/20220329021251.123805-1-liu.yun@linux.dev Suggested-by: Christoph Hellwig Reviewed-by: Guenter Roeck Reviewed-by: Christoph Hellwig Signed-off-by: Jackie Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 226a50944c00..dc6872e352bd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1384,10 +1384,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) { sdev->bsg_dev = scsi_bsg_register_queue(sdev); if (IS_ERR(sdev->bsg_dev)) { - /* - * We're treating error on bsg register as non-fatal, so - * pretend nothing went wrong. - */ error = PTR_ERR(sdev->bsg_dev); sdev_printk(KERN_INFO, sdev, "Failed to register bsg queue, errno=%d\n", From 5ca0faf9c292029ec6f9edbfe7b42e97dcb87dca Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 21 Mar 2022 16:18:53 +0100 Subject: [PATCH 063/423] scsi: ufs: qcom: Drop custom Android boot parameters The QCOM UFS driver requires an androidboot.bootdevice command line argument matching the UFS device name. If the name is different, it refuses to probe. This androidboot.bootdevice is provided by stock/vendor (from an Android-based device) bootloader. This does not make sense from Linux point of view. Driver should be able to boot regardless of bootloader. Driver should not depend on some Android custom environment data. Link: https://lore.kernel.org/r/20220321151853.24138-1-krzk@kernel.org Tested-by: Amit Pundir Tested-by: Luca Weiss Reviewed-by: Brian Masney Reviewed-by: Bjorn Andersson Reviewed-by: Alim Akhtar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 0d2e950d0865..586c0e567ff9 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -957,18 +957,6 @@ static const struct reset_control_ops ufs_qcom_reset_ops = { .deassert = ufs_qcom_reset_deassert, }; -#define ANDROID_BOOT_DEV_MAX 30 -static char android_boot_dev[ANDROID_BOOT_DEV_MAX]; - -#ifndef MODULE -static int __init get_android_boot_dev(char *str) -{ - strlcpy(android_boot_dev, str, ANDROID_BOOT_DEV_MAX); - return 1; -} -__setup("androidboot.bootdevice=", get_android_boot_dev); -#endif - /** * ufs_qcom_init - bind phy with controller * @hba: host controller instance @@ -988,9 +976,6 @@ static int ufs_qcom_init(struct ufs_hba *hba) struct resource *res; struct ufs_clk_info *clki; - if (strlen(android_boot_dev) && strcmp(android_boot_dev, dev_name(dev))) - return -ENODEV; - host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL); if (!host) { err = -ENOMEM; From 37a9bd7090cdf6657ced3f693897819a66ee8d52 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 22 Mar 2022 07:46:48 -0700 Subject: [PATCH 064/423] scsi: aic7xxx: Use standard PCI subsystem, subdevice defines Common defines should be used over custom defines. Change and remove these defines: - PCIR_SUBVEND_0 to PCI_SUBSYSTEM_VENDOR_ID - PCIR_SUBDEV_0 to PCI_SUBSYSTEM_ID Link: https://lore.kernel.org/r/20220322144648.2467777-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic79xx_osm.h | 2 -- drivers/scsi/aic7xxx/aic79xx_pci.c | 6 +++--- drivers/scsi/aic7xxx/aic7xxx_osm.h | 2 -- drivers/scsi/aic7xxx/aic7xxx_pci.c | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.h b/drivers/scsi/aic7xxx/aic79xx_osm.h index 679a4fd13874..793fe19993a9 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.h +++ b/drivers/scsi/aic7xxx/aic79xx_osm.h @@ -420,8 +420,6 @@ ahd_unlock(struct ahd_softc *ahd, unsigned long *flags) /* config registers for header type 0 devices */ #define PCIR_MAPS 0x10 -#define PCIR_SUBVEND_0 0x2c -#define PCIR_SUBDEV_0 0x2e /****************************** PCI-X definitions *****************************/ #define PCIXR_COMMAND 0x96 diff --git a/drivers/scsi/aic7xxx/aic79xx_pci.c b/drivers/scsi/aic7xxx/aic79xx_pci.c index 2f0bdb9225a4..5fad41b1ab58 100644 --- a/drivers/scsi/aic7xxx/aic79xx_pci.c +++ b/drivers/scsi/aic7xxx/aic79xx_pci.c @@ -260,8 +260,8 @@ ahd_find_pci_device(ahd_dev_softc_t pci) vendor = ahd_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2); device = ahd_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2); - subvendor = ahd_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2); - subdevice = ahd_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2); + subvendor = ahd_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); + subdevice = ahd_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2); full_id = ahd_compose_id(device, vendor, subdevice, @@ -298,7 +298,7 @@ ahd_pci_config(struct ahd_softc *ahd, const struct ahd_pci_identity *entry) * Record if this is an HP board. */ subvendor = ahd_pci_read_config(ahd->dev_softc, - PCIR_SUBVEND_0, /*bytes*/2); + PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); if (subvendor == SUBID_HP) ahd->flags |= AHD_HP_BOARD; diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.h b/drivers/scsi/aic7xxx/aic7xxx_osm.h index 4782a304e93c..51d9f4de0734 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -433,8 +433,6 @@ ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) /* config registers for header type 0 devices */ #define PCIR_MAPS 0x10 -#define PCIR_SUBVEND_0 0x2c -#define PCIR_SUBDEV_0 0x2e typedef enum { diff --git a/drivers/scsi/aic7xxx/aic7xxx_pci.c b/drivers/scsi/aic7xxx/aic7xxx_pci.c index dab3a6d12c4d..2d4c85426dc3 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_pci.c @@ -673,8 +673,8 @@ ahc_find_pci_device(ahc_dev_softc_t pci) vendor = ahc_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2); device = ahc_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2); - subvendor = ahc_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2); - subdevice = ahc_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2); + subvendor = ahc_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); + subdevice = ahc_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2); full_id = ahc_compose_id(device, vendor, subdevice, subvendor); /* From 16ed828b872d12ccba8f07bcc446ae89ba662f9c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Mar 2022 08:01:24 +0100 Subject: [PATCH 065/423] scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one() The error handling path of the probe releases a resource that is not freed in the remove function. In some cases, a ioremap() must be undone. Add the missing iounmap() call in the remove function. Link: https://lore.kernel.org/r/247066a3104d25f9a05de8b3270fc3c848763bcc.1647673264.git.christophe.jaillet@wanadoo.fr Fixes: 45804fbb00ee ("[SCSI] 53c700: Amiga Zorro NCR53c710 SCSI") Reviewed-by: Geert Uytterhoeven Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/zorro7xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index 27b9e2baab1a..7acf9193a9e8 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z) scsi_remove_host(host); NCR_700_release(host); + if (host->base > 0x01000000) + iounmap(hostdata->base); kfree(hostdata); free_irq(host->irq, host); zorro_release_device(z); From 7ff897b2a59558d919b72f1aef58b1aa63d19c61 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 19 Mar 2022 23:11:22 +0000 Subject: [PATCH 066/423] scsi: bnx2fc: Fix spelling mistake "mis-match" -> "mismatch" There are a few spelling mistakes in some error messages. Fix them. Link: https://lore.kernel.org/r/20220319231122.21476-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 0103f811cc25..776544385598 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -1169,7 +1169,7 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, ofld_kcqe->fcoe_conn_context_id); interface = tgt->port->priv; if (hba != interface->hba) { - printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mis-match\n"); + printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mismatch\n"); goto ofld_cmpl_err; } /* @@ -1226,12 +1226,12 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, * and enable */ if (tgt->context_id != context_id) { - printk(KERN_ERR PFX "context id mis-match\n"); + printk(KERN_ERR PFX "context id mismatch\n"); return; } interface = tgt->port->priv; if (hba != interface->hba) { - printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); + printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mismatch\n"); goto enbl_cmpl_err; } if (!ofld_kcqe->completion_status) From a6b758b0420bda28995ea6939ca0808fcec68be4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 19 Mar 2022 23:14:45 +0000 Subject: [PATCH 067/423] scsi: bnx2i: Fix spelling mistake "mis-match" -> "mismatch" There are a few spelling mistakes in some error messages. Fix them. Link: https://lore.kernel.org/r/20220319231445.21696-1-colin.i.king@gmail.com Acked-by: Manish Rangankar Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_hwi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 5521469ce678..7fe7f53a41c0 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -2398,7 +2398,7 @@ static void bnx2i_process_conn_destroy_cmpl(struct bnx2i_hba *hba, } if (hba != ep->hba) { - printk(KERN_ALERT "conn destroy- error hba mis-match\n"); + printk(KERN_ALERT "conn destroy- error hba mismatch\n"); return; } @@ -2432,7 +2432,7 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba, } if (hba != ep->hba) { - printk(KERN_ALERT "ofld_cmpl: error hba mis-match\n"); + printk(KERN_ALERT "ofld_cmpl: error hba mismatch\n"); return; } From 67bae5f28c895f8737a1974c3f31cf12b9170b14 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 29 Mar 2022 15:27:32 +0200 Subject: [PATCH 068/423] drm: of: Properly try all possible cases for bridge/panel detection While bridge/panel detection was initially relying on the usual port/ports-based of graph detection, it was recently changed to perform the lookup on any child node that is not port/ports instead when such a node is available, with no fallback on the usual way. This results in breaking detection when a child node is present but does not contain any panel or bridge node, even when the usual port/ports-based of graph is there. In order to support both situations properly, this commit reworks the logic to try both options and not just one of the two: it will only return -EPROBE_DEFER when both have failed. Signed-off-by: Paul Kocialkowski Fixes: 80253168dbfd ("drm: of: Lookup if child node has panel or bridge") Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220329132732.628474-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/drm_of.c | 101 ++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index 9d90cd75c457..8716da6369a6 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -219,6 +219,29 @@ int drm_of_encoder_active_endpoint(struct device_node *node, } EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint); +static int find_panel_or_bridge(struct device_node *node, + struct drm_panel **panel, + struct drm_bridge **bridge) +{ + if (panel) { + *panel = of_drm_find_panel(node); + if (!IS_ERR(*panel)) + return 0; + + /* Clear the panel pointer in case of error. */ + *panel = NULL; + } + + /* No panel found yet, check for a bridge next. */ + if (bridge) { + *bridge = of_drm_find_bridge(node); + if (*bridge) + return 0; + } + + return -EPROBE_DEFER; +} + /** * drm_of_find_panel_or_bridge - return connected panel or bridge device * @np: device tree node containing encoder output ports @@ -241,66 +264,44 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_panel **panel, struct drm_bridge **bridge) { - int ret = -EPROBE_DEFER; - struct device_node *remote; + struct device_node *node; + int ret; if (!panel && !bridge) return -EINVAL; + if (panel) *panel = NULL; + if (bridge) + *bridge = NULL; - /** - * Devices can also be child nodes when we also control that device - * through the upstream device (ie, MIPI-DCS for a MIPI-DSI device). - * - * Lookup for a child node of the given parent that isn't either port - * or ports. - */ - for_each_available_child_of_node(np, remote) { - if (of_node_name_eq(remote, "port") || - of_node_name_eq(remote, "ports")) + /* Check for a graph on the device node first. */ + if (of_graph_is_present(np)) { + node = of_graph_get_remote_node(np, port, endpoint); + if (node) { + ret = find_panel_or_bridge(node, panel, bridge); + of_node_put(node); + + if (!ret) + return 0; + } + } + + /* Otherwise check for any child node other than port/ports. */ + for_each_available_child_of_node(np, node) { + if (of_node_name_eq(node, "port") || + of_node_name_eq(node, "ports")) continue; - goto of_find_panel_or_bridge; + ret = find_panel_or_bridge(node, panel, bridge); + of_node_put(node); + + /* Stop at the first found occurrence. */ + if (!ret) + return 0; } - /* - * of_graph_get_remote_node() produces a noisy error message if port - * node isn't found and the absence of the port is a legit case here, - * so at first we silently check whether graph presents in the - * device-tree node. - */ - if (!of_graph_is_present(np)) - return -ENODEV; - - remote = of_graph_get_remote_node(np, port, endpoint); - -of_find_panel_or_bridge: - if (!remote) - return -ENODEV; - - if (panel) { - *panel = of_drm_find_panel(remote); - if (!IS_ERR(*panel)) - ret = 0; - else - *panel = NULL; - } - - /* No panel found yet, check for a bridge next. */ - if (bridge) { - if (ret) { - *bridge = of_drm_find_bridge(remote); - if (*bridge) - ret = 0; - } else { - *bridge = NULL; - } - - } - - of_node_put(remote); - return ret; + return -EPROBE_DEFER; } EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge); From e4ff77598a109bd36789ad5e80aba66fc53d0ffb Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 30 Mar 2022 19:21:23 +0530 Subject: [PATCH 069/423] powerpc/numa: Handle partially initialized numa nodes With commit 09f49dca570a ("mm: handle uninitialized numa nodes gracefully") NODE_DATA for even a memoryless/cpuless node is partially initialized at boot time. Before onlining the node, current Powerpc code checks for NODE_DATA to be NULL. However since NODE_DATA is partially initialized, this check will end up always being false. This causes hotplugging a CPU to a memoryless/cpuless node to fail. Before adding CPUs: $ numactl -H available: 1 nodes (4) node 4 cpus: 0 1 2 3 4 5 6 7 node 4 size: 97372 MB node 4 free: 95545 MB node distances: node 4 4: 10 $ lparstat System Configuration type=Dedicated mode=Capped smt=8 lcpu=1 mem=99709440 kB cpus=0 ent=1.00 %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 2.66 2.67 0.16 94.51 0.00 0.00 5.33 0.00 67749 0 After hotplugging 32 cores: $ numactl -H node 4 cpus: 0 1 2 3 4 5 6 7 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 node 4 size: 97372 MB node 4 free: 93636 MB node distances: node 4 4: 10 $ lparstat System Configuration type=Dedicated mode=Capped smt=8 lcpu=33 mem=99709440 kB cpus=0 ent=33.00 %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 0.04 0.02 0.00 99.94 0.00 0.00 0.06 0.00 1128751 3 As we can see numactl is listing only 8 cores while lparstat is showing 33 cores. Also dmesg is showing messages like: [ 2261.318350 ] BUG: arch topology borken [ 2261.318357 ] the DIE domain not a subset of the NODE domain Fixes: 09f49dca570a ("mm: handle uninitialized numa nodes gracefully") Reported-by: Geetika Moolchandani Signed-off-by: Srikar Dronamraju Acked-by: Michal Hocko Acked-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220330135123.1868197-1-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9b7fefbb64b..13022d734951 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu) if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; - if (NODE_DATA(new_nid) == NULL) { + if (!node_online(new_nid)) { #ifdef CONFIG_MEMORY_HOTPLUG /* * Need to ensure that NODE_DATA is initialized for a node from From 6b8a94332ee4f7d9a8ae0cbac7609f79c212f06c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:01 -0400 Subject: [PATCH 070/423] nfsd: Fix a write performance regression The call to filemap_flush() in nfsd_file_put() is there to ensure that we clear out any writes belonging to a NFSv3 client relatively quickly and avoid situations where the file can't be evicted by the garbage collector. It also ensures that we detect write errors quickly. The problem is this causes a regression in performance for some workloads. So try to improve matters by deferring writeback until we're ready to close the file, and need to detect errors so that we can force the client to resend. Tested-by: Jan Kara Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()") Signed-off-by: Trond Myklebust Link: https://lore.kernel.org/all/20220330103457.r4xrhy2d6nhtouzk@quack3.lan Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index cc2831cec669..496f7b3f7523 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } +static void +nfsd_file_flush(struct nfsd_file *nf) +{ + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf) return; } - filemap_flush(nf->nf_file->f_mapping); is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) + if (!is_hashed) { + nfsd_file_flush(nf); + nfsd_file_put_noref(nf); + } else { + nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); + } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); } @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); nfsd_file_put_noref(nf); } } @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) From 999397926ab3f78c7d1235cc4ca6e3c89d2769bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:02 -0400 Subject: [PATCH 071/423] nfsd: Clean up nfsd_file_put() Make it a little less racy, by removing the refcount_read() test. Then remove the redundant 'is_hashed' variable. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 496f7b3f7523..8f7ed5dbb003 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -301,21 +301,14 @@ nfsd_file_put_noref(struct nfsd_file *nf) void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed; - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { - nfsd_file_put_noref(nf); - return; - } - - is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - if (!is_hashed) { + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); } else { nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); + if (nf->nf_file) + nfsd_file_schedule_laundrette(); } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From 55037ed7bdc62151a726f5685f88afa6a82959b1 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: [PATCH 072/423] uapi/linux/stddef.h: Add include guards Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a284..7837ba4fe728 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif From 037250f0a45cf9ecf5b52d4b9ff8eadeb609c800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 30 Mar 2022 18:44:09 +0200 Subject: [PATCH 073/423] ath9k: Properly clear TX status area before reporting to mac80211 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ath9k driver was not properly clearing the status area in the ieee80211_tx_info struct before reporting TX status to mac80211. Instead, it was manually filling in fields, which meant that fields introduced later were left as-is. Conveniently, mac80211 actually provides a helper to zero out the status area, so use that to make sure we zero everything. The last commit touching the driver function writing the status information seems to have actually been fixing an issue that was also caused by the area being uninitialised; but it only added clearing of a single field instead of the whole struct. That is now redundant, though, so revert that commit and use it as a convenient Fixes tag. Fixes: cc591d77aba1 ("ath9k: Make sure to zero status.tx_time before reporting TX status") Reported-by: Bagas Sanjaya Cc: Signed-off-by: Toke Høiland-Jørgensen Tested-by: Bagas Sanjaya Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220330164409.16645-1-toke@toke.dk --- drivers/net/wireless/ath/ath9k/xmit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index d0caf1de2bde..cbcf96ac303e 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2553,6 +2553,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ieee80211_tx_info_clear_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2595,9 +2597,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, } tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) From 598be865ee00905f16ef3d0355fefe319cac981a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Mar 2022 11:40:30 +0200 Subject: [PATCH 074/423] MAINTAINERS: claim include/uapi/linux/wireless.h As much as I don't really want to maintain this legacy cruft that we started replacing 15+ years ago, for now it still falls on me to take care of it. Add a missing file to the list. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220328114029.526fbb42784d.If7c79b4ca827dfe82a545689f2d31fcedabd8387@changeid --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e406a6db67d0..ca19e2d9a074 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -201,6 +201,7 @@ F: include/net/ieee80211_radiotap.h F: include/net/iw_handler.h F: include/net/wext.h F: include/uapi/linux/nl80211.h +F: include/uapi/linux/wireless.h F: net/wireless/ 8169 10/100/1000 GIGABIT ETHERNET DRIVER From 61a891efbb1099bb7bdcedfc50f802fabbe46a0e Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 30 Mar 2022 17:40:46 +0300 Subject: [PATCH 075/423] MAINTAINERS: mark wil6210 as orphan Maya is not working on wil6210 anymore so mark it as orphan. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220330144046.11229-1-kvalo@kernel.org --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca19e2d9a074..cc7a6a65eecb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20965,10 +20965,8 @@ S: Maintained F: drivers/hid/hid-wiimote* WILOCITY WIL6210 WIRELESS DRIVER -M: Maya Erez L: linux-wireless@vger.kernel.org -L: wil6210@qti.qualcomm.com -S: Supported +S: Orphan W: https://wireless.wiki.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ From 7f921a2d6c93051b6002dbb7c1781f1fa5b88cce Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 3 Apr 2022 22:12:52 +1000 Subject: [PATCH 076/423] KVM: PPC: Move kvmhv_on_pseries() into kvm_ppc.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We recently introduced a usage of kvmhv_on_pseries() in powerpc.c, which causes a build error for ppc64_book3e_allmodconfig: arch/powerpc/kvm/powerpc.c:716:8: error: implicit declaration of function ‘kvmhv_on_pseries’ 716 | if (kvmhv_on_pseries()) { | ^~~~~~~~~~~~~~~~ Fix it by moving kvmhv_on_pseries() into kvm_ppc.h so that the stub version is available for book3e builds. Fixes: f771b55731fc ("KVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kvm_book3s_64.h | 12 ------------ arch/powerpc/include/asm/kvm_ppc.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 827038a33064..4def2bd17b9b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -16,18 +16,6 @@ #include #include -#ifdef CONFIG_PPC_PSERIES -static inline bool kvmhv_on_pseries(void) -{ - return !cpu_has_feature(CPU_FTR_HVMODE); -} -#else -static inline bool kvmhv_on_pseries(void) -{ - return false; -} -#endif - /* * Structure for a nested guest, that is, for a guest that is managed by * one of our guests. diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c583d0c37f31..838d4cb460b7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -586,6 +586,18 @@ static inline bool kvm_hv_mode_active(void) { return false; } #endif +#ifdef CONFIG_PPC_PSERIES +static inline bool kvmhv_on_pseries(void) +{ + return !cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool kvmhv_on_pseries(void) +{ + return false; +} +#endif + #ifdef CONFIG_KVM_XICS static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { From ec858afda857e361182ceafc3d2ba2b164b8e889 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2022 11:06:02 -0600 Subject: [PATCH 077/423] io_uring: don't check req->file in io_fsync_prep() This is a leftover from the really old days where we weren't able to track and error early if we need a file and it wasn't assigned. Kill the check. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a8413f006417..9108c56bff5b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4513,9 +4513,6 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - if (!req->file) - return -EBADF; - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || From 8027a9ad9b3568c5eb49c968ad6c97f279d76730 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 5 Jan 2022 15:47:29 +0800 Subject: [PATCH 078/423] drm/imx: imx-ldb: Check for null pointer after calling kmemdup As the possible failure of the allocation, kmemdup() may return NULL pointer. Therefore, it should be better to check the return value of kmemdup() and return error if fails. Fixes: dc80d7038883 ("drm/imx-ldb: Add support to drm-bridge") Signed-off-by: Jiasheng Jiang Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220105074729.2363657-1-jiasheng@iscas.ac.cn --- drivers/gpu/drm/imx/imx-ldb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index e5078d03020d..fb0e951248f6 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -572,6 +572,8 @@ static int imx_ldb_panel_ddc(struct device *dev, edidp = of_get_property(child, "edid", &edid_len); if (edidp) { channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL); + if (!channel->edid) + return -ENOMEM; } else if (!channel->panel) { /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, From bce81feb03a20fca7bbdd1c4af16b4e9d5c0e1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sat, 8 Jan 2022 17:52:30 +0100 Subject: [PATCH 079/423] drm/imx: Fix memory leak in imx_pd_connector_get_modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid leaking the display mode variable if of_get_drm_display_mode fails. Fixes: 76ecd9c9fb24 ("drm/imx: parallel-display: check return code from of_get_drm_display_mode()") Addresses-Coverity-ID: 1443943 ("Resource leak") Signed-off-by: José Expósito Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220108165230.44610-1-jose.exposito89@gmail.com --- drivers/gpu/drm/imx/parallel-display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06cb1a59b9bc..63ba2ad84679 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -75,8 +75,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + drm_mode_destroy(connector->dev, mode); return ret; + } drm_mode_copy(mode, &imxpd->mode); mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; From e8083acc3f8cc2097917018e947fd4c857f60454 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 28 Jan 2022 17:19:44 +0800 Subject: [PATCH 080/423] drm/imx: dw_hdmi-imx: Fix bailout in error cases of probe In dw_hdmi_imx_probe(), if error happens after dw_hdmi_probe() returns successfully, dw_hdmi_remove() should be called where necessary as bailout. Fixes: c805ec7eb210 ("drm/imx: dw_hdmi-imx: move initialization into probe") Cc: Philipp Zabel Cc: David Airlie Cc: Daniel Vetter Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220128091944.3831256-1-victor.liu@nxp.com --- drivers/gpu/drm/imx/dw_hdmi-imx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c index 87428fb23d9f..a2277a0d6d06 100644 --- a/drivers/gpu/drm/imx/dw_hdmi-imx.c +++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c @@ -222,6 +222,7 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; const struct of_device_id *match = of_match_node(dw_hdmi_imx_dt_ids, np); struct imx_hdmi *hdmi; + int ret; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); if (!hdmi) @@ -243,10 +244,15 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) hdmi->bridge = of_drm_find_bridge(np); if (!hdmi->bridge) { dev_err(hdmi->dev, "Unable to find bridge\n"); + dw_hdmi_remove(hdmi->hdmi); return -ENODEV; } - return component_add(&pdev->dev, &dw_hdmi_imx_ops); + ret = component_add(&pdev->dev, &dw_hdmi_imx_ops); + if (ret) + dw_hdmi_remove(hdmi->hdmi); + + return ret; } static int dw_hdmi_imx_remove(struct platform_device *pdev) From 070a88fd4a03f921b73a2059e97d55faaa447dab Mon Sep 17 00:00:00 2001 From: Leo Ruan Date: Mon, 7 Feb 2022 16:14:11 +0100 Subject: [PATCH 081/423] gpu: ipu-v3: Fix dev_dbg frequency output This commit corrects the printing of the IPU clock error percentage if it is between -0.1% to -0.9%. For example, if the pixel clock requested is 27.2 MHz but only 27.0 MHz can be achieved the deviation is -0.8%. But the fixed point math had a flaw and calculated error of 0.2%. Before: Clocks: IPU 270000000Hz DI 24716667Hz Needed 27200000Hz IPU clock can give 27000000 with divider 10, error 0.2% Want 27200000Hz IPU 270000000Hz DI 24716667Hz using IPU, 27000000Hz After: Clocks: IPU 270000000Hz DI 24716667Hz Needed 27200000Hz IPU clock can give 27000000 with divider 10, error -0.8% Want 27200000Hz IPU 270000000Hz DI 24716667Hz using IPU, 27000000Hz Signed-off-by: Leo Ruan Signed-off-by: Mark Jonas Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220207151411.5009-1-mark.jonas@de.bosch.com --- drivers/gpu/ipu-v3/ipu-di.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index 666223c6bec4..0a34e0ab4fe6 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -447,8 +447,9 @@ static void ipu_di_config_clock(struct ipu_di *di, error = rate / (sig->mode.pixelclock / 1000); - dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n", - rate, div, (signed)(error - 1000) / 10, error % 10); + dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n", + rate, div, error < 1000 ? '-' : '+', + abs(error - 1000) / 10, abs(error - 1000) % 10); /* Allow a 1% error */ if (error < 1010 && error >= 990) { From 92d96b603738ec4f35cde7198c303ae264dd47cb Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Sun, 27 Mar 2022 18:01:54 -0700 Subject: [PATCH 082/423] regulator: wm8994: Add an off-on delay for WM8994 variant As per Table 130 of the wm8994 datasheet at [1], there is an off-on delay for LDO1 and LDO2. In the wm8958 datasheet [2], I could not find any reference to it. I could not find a wm1811 datasheet to double-check there, but as no one has complained presumably it works without it. This solves the issue on Samsung Aries boards with a wm8994 where register writes fail when the device is powered off and back-on quickly. [1] https://statics.cirrus.com/pubs/proDatasheet/WM8994_Rev4.6.pdf [2] https://statics.cirrus.com/pubs/proDatasheet/WM8958_v3.5.pdf Signed-off-by: Jonathan Bakker Acked-by: Charles Keepax Link: https://lore.kernel.org/r/CY4PR04MB056771CFB80DC447C30D5A31CB1D9@CY4PR04MB0567.namprd04.prod.outlook.com Signed-off-by: Mark Brown --- drivers/regulator/wm8994-regulator.c | 42 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486..40befdd9dfa9 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = { }; static const struct regulator_desc wm8994_ldo_desc[] = { + { + .name = "LDO1", + .id = 1, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_1, + .vsel_mask = WM8994_LDO1_VSEL_MASK, + .ops = &wm8994_ldo1_ops, + .min_uV = 2400000, + .uV_step = 100000, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = 2, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_2, + .vsel_mask = WM8994_LDO2_VSEL_MASK, + .ops = &wm8994_ldo2_ops, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, +}; + +static const struct regulator_desc wm8958_ldo_desc[] = { { .name = "LDO1", .id = 1, @@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev) * regulator core and we need not worry about it on the * error path. */ - ldo->regulator = devm_regulator_register(&pdev->dev, - &wm8994_ldo_desc[id], - &config); + if (ldo->wm8994->type == WM8994) { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8994_ldo_desc[id], + &config); + } else { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8958_ldo_desc[id], + &config); + } + if (IS_ERR(ldo->regulator)) { ret = PTR_ERR(ldo->regulator); dev_err(wm8994->dev, "Failed to register LDO%d: %d\n", From 17049bf9de55a42ee96fd34520aff8a484677675 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 4 Apr 2022 10:25:14 +0800 Subject: [PATCH 083/423] regulator: rtq2134: Fix missing active_discharge_on setting The active_discharge_on setting was missed, so output discharge resistor is always disabled. Fix it. Fixes: 0555d41497de ("regulator: rtq2134: Add support for Richtek RTQ2134 SubPMIC") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20220404022514.449231-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/rtq2134-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/rtq2134-regulator.c b/drivers/regulator/rtq2134-regulator.c index f21e3f8b21f2..8e13dea354a2 100644 --- a/drivers/regulator/rtq2134-regulator.c +++ b/drivers/regulator/rtq2134-regulator.c @@ -285,6 +285,7 @@ static const unsigned int rtq2134_buck_ramp_delay_table[] = { .enable_mask = RTQ2134_VOUTEN_MASK, \ .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \ .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \ + .active_discharge_on = RTQ2134_ACTDISCHG_MASK, \ .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \ .ramp_mask = RTQ2134_RSPUP_MASK, \ .ramp_delay_table = rtq2134_buck_ramp_delay_table, \ From 2f8cf5f642e80f8b6b0e660a9c86924a1f41cd80 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Mar 2022 16:00:39 +0200 Subject: [PATCH 084/423] spi: rpc-if: Fix RPM imbalance in probe error path If rpcif_hw_init() fails, Runtime PM is left enabled. Fixes: b04cc0d912eb80d3 ("memory: renesas-rpc-if: Add support for RZ/G2L") Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/1c78a1f447d019bb66b6e7787f520ae78821e2ae.1648562287.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spi-rpc-if.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index fe82f3575df4..24ec1c83f379 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -158,14 +158,18 @@ static int rpcif_spi_probe(struct platform_device *pdev) error = rpcif_hw_init(rpc, false); if (error) - return error; + goto out_disable_rpm; error = spi_register_controller(ctlr); if (error) { dev_err(&pdev->dev, "spi_register_controller failed\n"); - rpcif_disable_rpm(rpc); + goto out_disable_rpm; } + return 0; + +out_disable_rpm: + rpcif_disable_rpm(rpc); return error; } From 35d516bdcd92fde46202d06b68df1166760208fd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Apr 2022 12:11:13 +0200 Subject: [PATCH 085/423] spi: mxic: Fix an error handling path in mxic_spi_probe() If spi_register_master() fails, we must undo a previous mxic_spi_mem_ecc_probe() call, as already done in the remove function. Fixes: 00360ebae483 ("spi: mxic: Add support for pipelined ECC operations") Signed-off-by: Christophe JAILLET Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/09c81f751241f6ec0bac7a48d4ec814a742e0d17.1648980664.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-mxic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 55c092069301..65be8e085ab8 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -813,6 +813,7 @@ static int mxic_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "spi_register_master failed\n"); pm_runtime_disable(&pdev->dev); + mxic_spi_mem_ecc_remove(mxic); } return ret; From 2316f0fc0ad2aa87a568ceaf3d76be983ee555c3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Apr 2022 21:22:35 +0800 Subject: [PATCH 086/423] regulator: atc260x: Fix missing active_discharge_on setting Without active_discharge_on setting, the SWITCH1 discharge enable control is always disabled. Fix it. Fixes: 3b15ccac161a ("regulator: Add regulator driver for ATC260x PMICs") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20220403132235.123727-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/atc260x-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c index 05147d2c3842..485e58b264c0 100644 --- a/drivers/regulator/atc260x-regulator.c +++ b/drivers/regulator/atc260x-regulator.c @@ -292,6 +292,7 @@ enum atc2603c_reg_ids { .bypass_mask = BIT(5), \ .active_discharge_reg = ATC2603C_PMU_SWITCH_CTL, \ .active_discharge_mask = BIT(1), \ + .active_discharge_on = BIT(1), \ .owner = THIS_MODULE, \ } From dbf3f09322141b6f04a33949453b7626f62d9e0b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Apr 2022 07:51:22 +0200 Subject: [PATCH 087/423] tty: serial: mpc52xx_uart: make rx/tx hooks return unsigned, part II. The below commit changed types of some hooks in struct psc_ops. It also changed the types of the functions which are referenced in the instances of the above struct. However the commit did so only for CONFIG_PPC_MPC52xx, but not for CONFIG_PPC_MPC512x. This results in build errors like: mpc52xx_uart.c:static unsigned int mpc52xx_psc_raw_tx_rdy(struct uart_port *port) mpc52xx_uart.c:static int mpc512x_psc_raw_tx_rdy(struct uart_port *port) ^^^ mpc52xx_uart.c:static int mpc5125_psc_raw_tx_rdy(struct uart_port *port) ^^^ Therefore, fix the latter case now too. Fixes: 18662a1d8f35 (tty: serial: mpc52xx_uart: make rx/tx hooks return unsigned) Cc: Linus Torvalds Reported-by: Guenter Roeck Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220404055122.31194-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mpc52xx_uart.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index 8a6958377764..3acc0f185762 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -436,31 +436,31 @@ static void mpc512x_psc_fifo_init(struct uart_port *port) out_be32(&FIFO_512x(port)->rximr, MPC512x_PSC_FIFO_ALARM); } -static int mpc512x_psc_raw_rx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_raw_rx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_512x(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY); } -static int mpc512x_psc_raw_tx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_raw_tx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_512x(port)->txsr) & MPC512x_PSC_FIFO_FULL); } -static int mpc512x_psc_rx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_rx_rdy(struct uart_port *port) { return in_be32(&FIFO_512x(port)->rxsr) & in_be32(&FIFO_512x(port)->rximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc512x_psc_tx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_tx_rdy(struct uart_port *port) { return in_be32(&FIFO_512x(port)->txsr) & in_be32(&FIFO_512x(port)->tximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc512x_psc_tx_empty(struct uart_port *port) +static unsigned int mpc512x_psc_tx_empty(struct uart_port *port) { return in_be32(&FIFO_512x(port)->txsr) & MPC512x_PSC_FIFO_EMPTY; @@ -780,29 +780,29 @@ static void mpc5125_psc_fifo_init(struct uart_port *port) out_be32(&FIFO_5125(port)->rximr, MPC512x_PSC_FIFO_ALARM); } -static int mpc5125_psc_raw_rx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_raw_rx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_5125(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY); } -static int mpc5125_psc_raw_tx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_raw_tx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_FULL); } -static int mpc5125_psc_rx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_rx_rdy(struct uart_port *port) { return in_be32(&FIFO_5125(port)->rxsr) & in_be32(&FIFO_5125(port)->rximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc5125_psc_tx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_tx_rdy(struct uart_port *port) { return in_be32(&FIFO_5125(port)->txsr) & in_be32(&FIFO_5125(port)->tximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc5125_psc_tx_empty(struct uart_port *port) +static unsigned int mpc5125_psc_tx_empty(struct uart_port *port) { return in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_EMPTY; } From a0ab7e5bc9651d65637f50ee9c09e083919677ed Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Apr 2022 09:44:32 +0100 Subject: [PATCH 088/423] Revert "arm64: Change elfcore for_each_mte_vma() to use VMA iterator" This reverts commit 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae. Revert this temporary bodge. It only existed to ease integration with the maple tree work for the 5.18 merge window and that doesn't appear to have landed in any case. Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3ed39c61a510..3455ee4acc04 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,16 +8,9 @@ #include #include -#ifndef VMA_ITERATOR -#define VMA_ITERATOR(name, mm, addr) \ - struct mm_struct *name = mm -#define for_each_vma(vmi, vma) \ - for (vma = vmi->mmap; vma; vma = vma->vm_next) -#endif - -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(tsk, vma) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -72,9 +65,8 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) vma_count++; return vma_count; @@ -83,9 +75,8 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -109,9 +100,8 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -120,9 +110,8 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; From 16decce22efa0813beafbc9084181e299b69a1a1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Apr 2022 16:13:56 +0100 Subject: [PATCH 089/423] arm64: mte: Fix the stack frame size warning in mte_dump_tag_range() With 64K page configurations, the tags array stored on the stack of the mte_dump_tag_range() function is 2048 bytes, triggering a compiler warning when CONFIG_FRAME_WARN is enabled. Switch to a kmalloc() allocation via mte_allocate_tag_storage(). Signed-off-by: Catalin Marinas Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Reported-by: kernel test robot Cc: Will Deacon Link: https://lore.kernel.org/r/20220401151356.1674232-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3455ee4acc04..2b3f3d0544b9 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -25,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) static int mte_dump_tag_range(struct coredump_params *cprm, unsigned long start, unsigned long end) { + int ret = 1; unsigned long addr; + void *tags = NULL; for (addr = start; addr < end; addr += PAGE_SIZE) { - char tags[MTE_PAGE_TAG_STORAGE]; struct page *page = get_dump_page(addr); /* @@ -52,13 +53,28 @@ static int mte_dump_tag_range(struct coredump_params *cprm, continue; } + if (!tags) { + tags = mte_allocate_tag_storage(); + if (!tags) { + put_page(page); + ret = 0; + break; + } + } + mte_save_page_tags(page_address(page), tags); put_page(page); - if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) - return 0; + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { + mte_free_tag_storage(tags); + ret = 0; + break; + } } - return 1; + if (tags) + mte_free_tag_storage(tags); + + return ret; } Elf_Half elf_core_extra_phdrs(void) From 5524cbb1bfcdff0cad0aaa9f94e6092002a07259 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Thu, 31 Mar 2022 11:39:26 -0400 Subject: [PATCH 090/423] arch/arm64: Fix topology initialization for core scheduling Arm64 systems rely on store_cpu_topology() to call update_siblings_masks() to transfer the toplogy to the various cpu masks. This needs to be done before the call to notify_cpu_starting() which tells the scheduler about each cpu found, otherwise the core scheduling data structures are setup in a way that does not match the actual topology. With smt_mask not setup correctly we bail on `cpumask_weight(smt_mask) == 1` for !leaders in: notify_cpu_starting() cpuhp_invoke_callback_range() sched_cpu_starting() sched_core_cpu_starting() which leads to rq->core not being correctly set for !leader-rq's. Without this change stress-ng (which enables core scheduling in its prctl tests in newer versions -- i.e. with PR_SCHED_CORE support) causes a warning and then a crash (trimmed for legibility): [ 1853.805168] ------------[ cut here ]------------ [ 1853.809784] task_rq(b)->core != rq->core [ 1853.809792] WARNING: CPU: 117 PID: 0 at kernel/sched/fair.c:11102 cfs_prio_less+0x1b4/0x1c4 ... [ 1854.015210] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 ... [ 1854.231256] Call trace: [ 1854.233689] pick_next_task+0x3dc/0x81c [ 1854.237512] __schedule+0x10c/0x4cc [ 1854.240988] schedule_idle+0x34/0x54 Fixes: 9edeaea1bc45 ("sched: Core-wide rq->lock") Signed-off-by: Phil Auld Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Link: https://lore.kernel.org/r/20220331153926.25742-1-pauld@redhat.com Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1e6baa..3b46041f2b97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void) * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); + store_cpu_topology(cpu); /* * Enable GIC and timers. @@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void) ipi_setup(cpu); - store_cpu_topology(cpu); numa_add_cpu(cpu); /* From dd671f16b1cdb188aa64d740a408f7d00e281444 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 18 Mar 2022 11:37:05 +0100 Subject: [PATCH 091/423] arm64: fix typos in comments Various spelling mistakes in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20220318103729.157574-10-Julia.Lawall@inria.fr [will: Squashed in 20220318103729.157574-28-Julia.Lawall@inria.fr] Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/arm64/kernel/module-plts.c | 2 +- arch/arm64/kernel/suspend.c | 2 +- arch/arm64/mm/init.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 712e97c03e54..cd868084e724 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler); * addresses. There is no straight-forward way, short of disassembling the * offending instruction, to map that address back to the watchpoint. This * function computes the distance of the memory access from the watchpoint as a - * heuristic for the likelyhood that a given access triggered the watchpoint. + * heuristic for the likelihood that a given access triggered the watchpoint. * * See Section D2.10.5 "Determining the memory location that caused a Watchpoint * exception" of ARMv8 Architecture Reference Manual for details. diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index e53493d8b208..a3d0494f25a9 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, * increasing the section's alignment so that the * resulting address of this instruction is guaranteed * to equal the offset in that particular bit (as well - * as all less signficant bits). This ensures that the + * as all less significant bits). This ensures that the * address modulo 4 KB != 0xfff8 or 0xfffc (which would * have all ones in bits [11:3]) */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 19ee7c33769d..2b0887e58a7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully - * renabled if it was enabled when core started shutdown. + * reenabled if it was enabled when core started shutdown. */ local_daif_restore(flags); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8ac25f19084e..1e7b1550e2fc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr); * In this scheme a comparatively quicker boot is observed. * * If ZONE_DMA configs are defined, crash kernel memory reservation - * is delayed until DMA zone memory range size initilazation performed in + * is delayed until DMA zone memory range size initialization performed in * zone_sizes_init(). The defer is necessary to steer clear of DMA zone * memory range to avoid overlap allocation. So crash kernel memory boundaries * are not known when mapping all bank memory ranges, which otherwise means @@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr); * so page-granularity mappings are created for the entire memory range. * Hence a slightly slower boot is observed. * - * Note: Page-granularity mapppings are necessary for crash kernel memory + * Note: Page-granularity mappings are necessary for crash kernel memory * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) From 4dfa1f3657a0d4fb556d4440322d35bdcf5e4970 Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Mon, 21 Mar 2022 10:56:27 +0800 Subject: [PATCH 092/423] arm64: Fix comments in macro __init_el2_gicv3 Fix typo in comment. Signed-off-by: Zhiyuan Dai Link: https://lore.kernel.org/r/1647831387-3686-1-git-send-email-daizhiyuan@phytium.com.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0ce..c31be7eda9df 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm From 2012a9e279013933885983cbe0a5fe828052563b Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 27 Mar 2022 13:57:33 +0800 Subject: [PATCH 093/423] perf: qcom_l2_pmu: fix an incorrect NULL check on list iterator The bug is here: return cluster; The list iterator value 'cluster' will *always* be set and non-NULL by list_for_each_entry(), so it is incorrect to assume that the iterator value will be NULL if the list is empty or no element is found. To fix the bug, return 'cluster' when found, otherwise return NULL. Cc: stable@vger.kernel.org Fixes: 21bdbb7102ed ("perf: add qcom l2 cache perf events driver") Signed-off-by: Xiaomeng Tong Link: https://lore.kernel.org/r/20220327055733.4070-1-xiam0nd.tong@gmail.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 7640491aab12..30234c261b05 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( { u64 mpidr; int cpu_cluster_id; - struct cluster_pmu *cluster = NULL; + struct cluster_pmu *cluster; /* * This assumes that the cluster_id is in MPIDR[aff1] for @@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( cluster->cluster_id); cpumask_set_cpu(cpu, &cluster->cluster_cpus); *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; - break; + return cluster; } - return cluster; + return NULL; } static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) From 1d8e926a04b948f03b3c98aabf7e0033ac12ffbc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Mar 2022 15:10:10 +0200 Subject: [PATCH 094/423] perf: MARVELL_CN10K_DDR_PMU should depend on ARCH_THUNDER The Marvell CN10K DRAM Subsystem (DSS) performance monitor is only present on Marvell CN10K SoCs. Hence add a dependency on ARCH_THUNDER, to prevent asking the user about this driver when configuring a kernel without Cavium Thunder (incl. Marvell CN10K) SoC support, Fixes: 68fa55f0e05c ("perf/marvell: cn10k DDR perf event core ownership") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/18bfd6e1bcf67db7ea656d684a8bbb68261eeb54.1648559364.git.geert+renesas@glider.be Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index afdcb91601d2..1e2d69453771 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -187,7 +187,7 @@ source "drivers/perf/hisilicon/Kconfig" config MARVELL_CN10K_DDR_PMU tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enable perf support for Marvell DDR Performance monitoring event on CN10K platform. From 7e2646ed47542123168d43916b84b954532e5386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 18 Mar 2022 15:14:41 +0100 Subject: [PATCH 095/423] Revert "mmc: sdhci-xenon: fix annoying 1.8V regulator warning" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bb32e1987bc55ce1db400faf47d85891da3c9b9f. Commit 1a3ed0dc3594 ("mmc: sdhci-xenon: fix 1.8v regulator stabilization") contains proper fix for the issue described in commit bb32e1987bc5 ("mmc: sdhci-xenon: fix annoying 1.8V regulator warning"). Fixes: 8d876bf472db ("mmc: sdhci-xenon: wait 5ms after set 1.8V signal enable") Cc: stable@vger.kernel.org # 1a3ed0dc3594 ("mmc: sdhci-xenon: fix 1.8v regulator stabilization") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/20220318141441.32329-1-pali@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 666cee4c7f7c..08e838400b52 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -241,16 +241,6 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); - - /* - * For some reason the controller's Host Control2 register reports - * the bit representing 1.8V signaling as 0 when read after it was - * written as 1. Subsequent read reports 1. - * - * Since this may cause some issues, do an empty read of the Host - * Control2 register here to circumvent this. - */ - sdhci_readw(host, SDHCI_HOST_CONTROL2); } static unsigned int xenon_get_max_clock(struct sdhci_host *host) From 0d319dd5a27183b75d984e3dc495248e59f99334 Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Thu, 17 Mar 2022 12:19:43 +0100 Subject: [PATCH 096/423] mmc: mmci: stm32: correctly check all elements of sg list Use sg and not data->sg when checking sg list elements. Else only the first element alignment is checked. The last element should be checked the same way, for_each_sg already set sg to sg_next(sg). Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Signed-off-by: Yann Gautier Link: https://lore.kernel.org/r/20220317111944.116148-2-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 9c13f2c31365..4566d7fc9055 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, * excepted the last element which has no constraint on idmasize */ for_each_sg(data->sg, sg, data->sg_len - 1, i) { - if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) || - !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { dev_err(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); @@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, } } - if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { dev_err(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); From 5d435933376962b107bd76970912e7e80247dcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=B6hle?= Date: Thu, 24 Mar 2022 14:18:41 +0000 Subject: [PATCH 097/423] mmc: block: Check for errors after write on SPI Introduce a SEND_STATUS check for writes through SPI to not mark an unsuccessful write as successful. Since SPI SD/MMC does not have states, after a write, the card will just hold the line LOW until it is ready again. The driver marks the write therefore as completed as soon as it reads something other than all zeroes. The driver does not distinguish from a card no longer signalling busy and it being disconnected (and the line being pulled-up by the host). This lead to writes being marked as successful when disconnecting a busy card. Now the card is ensured to be still connected by an additional CMD13, just like non-SPI is ensured to go back to TRAN state. While at it and since we already poll for the post-write status anyway, we might as well check for SPIs error bits (any of them). The disconnecting card problem is reproducable for me after continuous write activity and randomly disconnecting, around every 20-50 tries on SPI DS for some card. Fixes: 7213d175e3b6f ("MMC/SD card driver learns SPI") Cc: stable@vger.kernel.org Signed-off-by: Christian Loehle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/76f6f5d2b35543bab3dfe438f268609c@hyperstone.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4e67c1403cc9..be2078684417 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1880,6 +1880,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } +static int mmc_spi_err_check(struct mmc_card *card) +{ + u32 status = 0; + int err; + + /* + * SPI does not have a TRAN state we have to wait on, instead the + * card is ready again when it no longer holds the line LOW. + * We still have to ensure two things here before we know the write + * was successful: + * 1. The card has not disconnected during busy and we actually read our + * own pull-up, thinking it was still connected, so ensure it + * still responds. + * 2. Check for any error bits, in particular R1_SPI_IDLE to catch a + * just reconnected card after being disconnected during busy. + */ + err = __mmc_send_status(card, &status, 0); + if (err) + return err; + /* All R1 and R2 bits of SPI are errors in our case */ + if (status) + return -EIO; + return 0; +} + static int mmc_blk_busy_cb(void *cb_data, bool *busy) { struct mmc_blk_busy_data *data = cb_data; @@ -1903,9 +1928,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) struct mmc_blk_busy_data cb_data; int err; - if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) + if (rq_data_dir(req) == READ) return 0; + if (mmc_host_is_spi(card->host)) { + err = mmc_spi_err_check(card); + if (err) + mqrq->brq.data.bytes_xfered = 0; + return err; + } + cb_data.card = card; cb_data.status = 0; err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS, From 08ebf903af57cda6d773f3dd1671b64f73b432b8 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 31 Mar 2022 15:32:23 +0800 Subject: [PATCH 098/423] mmc: core: Fixup support for writeback-cache for eMMC and SD During the card initialization process, the mmc core checks whether the eMMC/SD card supports an internal writeback-cache and then enables it inside the card. Unfortunately, this isn't according to what the mmc core reports to the upper block layer. Instead, the writeback-cache support with REQ_FLUSH and REQ_FUA, are being enabled depending on whether the host supports the CMD23 (MMC_CAP_CMD23) and whether an eMMC supports the reliable-write command. This is wrong and it may also sound awkward. In fact, it's a remnant from when both eMMC/SD cards didn't have dedicated commands/support to control the internal writeback-cache. In other words, it was the best we could do at that point in time. To fix the problem, but also without breaking backwards compatibility, let's align the REQ_FLUSH support with whether the writeback-cache became successfully enabled - for both eMMC and SD cards. Cc: stable@kernel.org Fixes: 881d1c25f765 ("mmc: core: Add cache control for eMMC4.5 device") Fixes: 130206a615a9 ("mmc: core: Add support for cache ctrl for SD cards") Depends-on: 97fce126e279 ("mmc: block: Issue a cache flush only when it's enabled") Reviewed-by: Avri Altman Signed-off-by: Michael Wu Link: https://lore.kernel.org/r/20220331073223.106415-1-michael@allwinnertech.com [Ulf: Re-wrote the commit message] Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index be2078684417..db99882c95d8 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2382,6 +2382,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; + bool cache_enabled = false; + bool fua_enabled = false; devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL); if (devidx < 0) { @@ -2461,13 +2463,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->flags |= MMC_BLK_CMD23; } - if (mmc_card_mmc(card) && - md->flags & MMC_BLK_CMD23 && + if (md->flags & MMC_BLK_CMD23 && ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || card->ext_csd.rel_sectors)) { md->flags |= MMC_BLK_REL_WR; - blk_queue_write_cache(md->queue.queue, true, true); + fua_enabled = true; + cache_enabled = true; } + if (mmc_cache_enabled(card->host)) + cache_enabled = true; + + blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled); string_get_size((u64)size, 512, STRING_UNITS_2, cap_str, sizeof(cap_str)); From 46d4820f949a3030b19ee482c68a50b06dd27590 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 4 Apr 2022 12:05:08 +0200 Subject: [PATCH 099/423] mmc: renesas_sdhi: special 4tap settings only apply to HS400 Previous documentation was vague, so we included SDR104 for slow SDnH clock settings. It turns out now, that it is only needed for HS400. Fixes: bb6d3fa98a41 ("clk: renesas: rcar-gen3: Switch to new SD clock handling") Cc: stable@vger.kernel.org Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20220404100508.3209-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 2797a9c0f17d..2a4d314aa027 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -144,9 +144,9 @@ static unsigned int renesas_sdhi_clk_update(struct tmio_mmc_host *host, return clk_get_rate(priv->clk); if (priv->clkh) { + /* HS400 with 4TAP needs different clock settings */ bool use_4tap = priv->quirks && priv->quirks->hs400_4taps; - bool need_slow_clkh = (host->mmc->ios.timing == MMC_TIMING_UHS_SDR104) || - (host->mmc->ios.timing == MMC_TIMING_MMC_HS400); + bool need_slow_clkh = host->mmc->ios.timing == MMC_TIMING_MMC_HS400; clkh_shift = use_4tap && need_slow_clkh ? 1 : 2; ref_clk = priv->clkh; } From 5467801f1fcbdc46bc7298a84dbf3ca1ff2a7320 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Mon, 21 Mar 2022 19:02:41 +0530 Subject: [PATCH 100/423] gpio: Restrict usage of GPIO chip irq members before initialization GPIO chip irq members are exposed before they could be completely initialized and this leads to race conditions. One such issue was observed for the gc->irq.domain variable which was accessed through the I2C interface in gpiochip_to_irq() before it could be initialized by gpiochip_add_irqchip(). This resulted in Kernel NULL pointer dereference. Following are the logs for reference :- kernel: Call Trace: kernel: gpiod_to_irq+0x53/0x70 kernel: acpi_dev_gpio_irq_get_by+0x113/0x1f0 kernel: i2c_acpi_get_irq+0xc0/0xd0 kernel: i2c_device_probe+0x28a/0x2a0 kernel: really_probe+0xf2/0x460 kernel: RIP: 0010:gpiochip_to_irq+0x47/0xc0 To avoid such scenarios, restrict usage of GPIO chip irq members before they are completely initialized. Signed-off-by: Shreeya Patel Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 19 +++++++++++++++++++ include/linux/gpio/driver.h | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e59884cc12a7..085348e08986 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1404,6 +1404,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; +#ifdef CONFIG_GPIOLIB_IRQCHIP + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + if (!gc->irq.initialized) + return -EPROBE_DEFER; +#endif + if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; @@ -1593,6 +1603,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, acpi_gpiochip_request_interrupts(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + return 0; } diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 98c93510640e..874aabd270c9 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -221,6 +221,15 @@ struct gpio_irq_chip { */ bool per_parent_data; + /** + * @initialized: + * + * Flag to track GPIO chip irq member's initialization. + * This flag will make sure GPIO chip irq members are not used + * before they are initialized. + */ + bool initialized; + /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when From 36560efeab3232aa18d1190f7202eb42ff29e0f4 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 21 Mar 2022 14:06:24 -0400 Subject: [PATCH 101/423] platform/x86: think-lmi: certificate support clean ups Complete some clean-ups as reqested from the last review as follow-ups - Remove certificate from structure as no need to store it any more - Clean up return code handling - Moved freeing of signature to before admin object released (issue seen in testing when unloading module) - Minor code flow improvements Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220321180624.4761-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 44 +++++++++++--------------------- drivers/platform/x86/think-lmi.h | 1 - 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index bce17ca97947..a01a92769c1a 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -740,16 +740,8 @@ static ssize_t certificate_store(struct kobject *kobj, if (!tlmi_priv.certificate_support) return -EOPNOTSUPP; - new_cert = kstrdup(buf, GFP_KERNEL); - if (!new_cert) - return -ENOMEM; - /* Strip out CR if one is present */ - strip_cr(new_cert); - /* If empty then clear installed certificate */ - if (new_cert[0] == '\0') { /* Clear installed certificate */ - kfree(new_cert); - + if ((buf[0] == '\0') || (buf[0] == '\n')) { /* Clear installed certificate */ /* Check that signature is set */ if (!setting->signature || !setting->signature[0]) return -EACCES; @@ -763,14 +755,16 @@ static ssize_t certificate_store(struct kobject *kobj, ret = tlmi_simple_call(LENOVO_CLEAR_BIOS_CERT_GUID, auth_str); kfree(auth_str); - if (ret) - return ret; - kfree(setting->certificate); - setting->certificate = NULL; - return count; + return ret ?: count; } + new_cert = kstrdup(buf, GFP_KERNEL); + if (!new_cert) + return -ENOMEM; + /* Strip out CR if one is present */ + strip_cr(new_cert); + if (setting->cert_installed) { /* Certificate is installed so this is an update */ if (!setting->signature || !setting->signature[0]) { @@ -792,21 +786,14 @@ static ssize_t certificate_store(struct kobject *kobj, auth_str = kasprintf(GFP_KERNEL, "%s,%s", new_cert, setting->password); } - if (!auth_str) { - kfree(new_cert); + kfree(new_cert); + if (!auth_str) return -ENOMEM; - } ret = tlmi_simple_call(guid, auth_str); kfree(auth_str); - if (ret) { - kfree(new_cert); - return ret; - } - kfree(setting->certificate); - setting->certificate = new_cert; - return count; + return ret ?: count; } static struct kobj_attribute auth_certificate = __ATTR_WO(certificate); @@ -1194,6 +1181,10 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.attribute_kset); + /* Free up any saved signatures */ + kfree(tlmi_priv.pwd_admin->signature); + kfree(tlmi_priv.pwd_admin->save_signature); + /* Authentication structures */ sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_admin->kobj); @@ -1210,11 +1201,6 @@ static void tlmi_release_attr(void) } kset_unregister(tlmi_priv.authentication_kset); - - /* Free up any saved certificates/signatures */ - kfree(tlmi_priv.pwd_admin->certificate); - kfree(tlmi_priv.pwd_admin->signature); - kfree(tlmi_priv.pwd_admin->save_signature); } static int tlmi_sysfs_init(void) diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 4f69df6eed07..4daba6151cd6 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -63,7 +63,6 @@ struct tlmi_pwd_setting { int index; /*Used for HDD and NVME auth */ enum level_option level; bool cert_installed; - char *certificate; char *signature; char *save_signature; }; From 442b8b250c41050d26353eb158514f3d91df3455 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Mar 2022 14:53:19 -0300 Subject: [PATCH 102/423] MAINTAINERS: Add Leon Romanovsky to RDMA maintainers Welcome Leon to the maintainer list so we continue to have two people on a medium sized subsystem. Link: https://lore.kernel.org/r/0-v1-64175bea3d24+13436-leon_maint_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..7f308fb9faa9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9598,6 +9598,7 @@ F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM M: Jason Gunthorpe +M: Leon Romanovsky L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/linux-rdma/rdma-core From abcc160e4c2bc320f0d94e1b77f272a12fe90a0e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 29 Mar 2022 14:42:21 -0400 Subject: [PATCH 103/423] MAINTAINERS: Update qib and hfi1 related drivers Remove Mike's contact from maintainers file. Link: https://lore.kernel.org/r/20220329184221.182061.69846.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7f308fb9faa9..9b1c91d9fb8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8675,7 +8675,6 @@ F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h HFI1 DRIVER -M: Mike Marciniszyn M: Dennis Dalessandro L: linux-rdma@vger.kernel.org S: Supported @@ -14657,7 +14656,6 @@ F: drivers/rtc/rtc-optee.c OPA-VNIC DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/ulp/opa_vnic @@ -16099,7 +16097,6 @@ F: include/uapi/linux/qemu_fw_cfg.h QIB DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qib/ @@ -16617,7 +16614,6 @@ F: drivers/net/ethernet/rdc/r6040.c RDMAVT - RDMA verbs software M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rdmavt From 84c2362fb65d69c721fec0974556378cbb36a62b Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 4 Apr 2022 11:58:03 +0300 Subject: [PATCH 104/423] RDMA/mlx5: Don't remove cache MRs when a delay is needed Don't remove MRs from the cache if need to delay the removal. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/c3087a90ff362c8796c7eaa2715128743ce36722.1649062436.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 956f8e875daa..45b0680377ec 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -574,8 +574,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) spin_lock_irq(&ent->lock); if (ent->disabled) goto out; - if (need_delay) + if (need_delay) { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + goto out; + } remove_cache_mr_locked(ent); queue_adjust_cache_locked(ent); } From 1d735eeee63a0beb65180ca0224f239cc0c9f804 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 4 Apr 2022 11:58:04 +0300 Subject: [PATCH 105/423] RDMA/mlx5: Add a missing update of cache->last_add Update cache->last_add when returning an MR to the cache so that the cache work won't remove it. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/c99f076fce4b44829d434936bbcd3b5fc4c95020.1649062436.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45b0680377ec..32ef67e9a6a7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -627,6 +627,7 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; From 107dd7beba403a363adfeb3ffe3734fe38a05cce Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 Apr 2022 11:58:05 +0300 Subject: [PATCH 106/423] IB/cm: Cancel mad on the DREQ event when the state is MRA_REP_RCVD On the passive side when the disconnectReq event comes, if the current state is MRA_REP_RCVD, it needs to cancel the MAD before entering the DREQ_RCVD and TIMEWAIT states, otherwise the destroy_id may block until this mad will reach timeout. Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Link: https://lore.kernel.org/r/75261c00c1d82128b1d981af9ff46e994186e621.1649062436.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 35f0d5e7533d..1c107d6d03b9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: + case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: @@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); break; - case IB_CM_MRA_REP_RCVD: - break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); From 0284d4d1be753f648f28b77bdfbe6a959212af5c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 22 Mar 2022 14:18:30 +0800 Subject: [PATCH 107/423] platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negative Eliminate the follow smatch warnings: drivers/platform/x86/samsung-laptop.c:1124 kbd_led_set() warn: unsigned 'value' is never less than zero. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Link: https://lore.kernel.org/r/20220322061830.105579-1-jiapeng.chong@linux.alibaba.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/samsung-laptop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index c1d9ed9b7b67..19f6b456234f 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev, if (value > samsung->kbd_led.max_brightness) value = samsung->kbd_led.max_brightness; - else if (value < 0) - value = 0; samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); From 3f2a3c79a4536fba6a8eee8a4f49218467216300 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Wed, 23 Mar 2022 15:50:25 +0800 Subject: [PATCH 108/423] platform/x86: barco-p50-gpio: Fix duplicate included linux/io.h Clean up the following includecheck warning: drivers/platform/x86/barco-p50-gpio.c: linux/io.h is included more than once. No functional change. Signed-off-by: Haowen Bai Acked-by: Peter Korsgaard Link: https://lore.kernel.org/r/1648021825-6182-1-git-send-email-baihaowen@meizu.com Signed-off-by: Hans de Goede --- drivers/platform/x86/barco-p50-gpio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c index f5c72e33f9ae..05534287bc26 100644 --- a/drivers/platform/x86/barco-p50-gpio.c +++ b/drivers/platform/x86/barco-p50-gpio.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include From c5547574797b254ba9c98c7da417bc5de71cd198 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:34 +0100 Subject: [PATCH 109/423] Documentation/ABI: sysfs-driver-intel_sdsi: Fix sphinx warnings Fix the following warnings from "make htmldocs": Documentation/ABI/testing/sysfs-driver-intel_sdsi:2: WARNING: Unexpected indentation. WARNING: Block quote ends without a blank line; unexpected unindent. WARNING: Definition list ends without a blank line; unexpected unindent. By turning the error-code table into a proper ReST table. While at it also fix the error-code table mixing tab and spaces for indentation (switch to all tabs). Reported-by: Stephen Rothwell Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-2-hdegoede@redhat.com --- .../ABI/testing/sysfs-driver-intel_sdsi | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel_sdsi b/Documentation/ABI/testing/sysfs-driver-intel_sdsi index ab122125ff9a..96b92c105ec4 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel_sdsi +++ b/Documentation/ABI/testing/sysfs-driver-intel_sdsi @@ -13,17 +13,19 @@ Description: Should the operation fail, one of the following error codes may be returned: + ========== ===== Error Code Cause - ---------- ----- - EIO General mailbox failure. Log may indicate cause. - EBUSY Mailbox is owned by another agent. - EPERM SDSI capability is not enabled in hardware. - EPROTO Failure in mailbox protocol detected by driver. + ========== ===== + EIO General mailbox failure. Log may indicate cause. + EBUSY Mailbox is owned by another agent. + EPERM SDSI capability is not enabled in hardware. + EPROTO Failure in mailbox protocol detected by driver. See log for details. - EOVERFLOW For provision commands, the size of the data + EOVERFLOW For provision commands, the size of the data exceeds what may be written. - ESPIPE Seeking is not allowed. - ETIMEDOUT Failure to complete mailbox transaction in time. + ESPIPE Seeking is not allowed. + ETIMEDOUT Failure to complete mailbox transaction in time. + ========== ===== What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid Date: Feb 2022 From 45440a1d79eed68bcb8db236a6967a1d5c37a8ce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:35 +0100 Subject: [PATCH 110/423] Documentation/ABI: sysfs-class-firmware-attributes: Fix Sphinx errors Fix the following warnings from "make htmldocs": Documentation/ABI/testing/sysfs-class-firmware-attributes:130: ERROR: Unexpected indentation. ERROR: Unexpected indentation. ERROR: Unexpected indentation. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-3-hdegoede@redhat.com --- .../testing/sysfs-class-firmware-attributes | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes index 05820365f1ec..5356ff2ed6c8 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware-attributes +++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes @@ -246,9 +246,7 @@ Description: that is being referenced (e.g hdd0, hdd1 etc) This attribute defaults to device 0. - certificate: - signature: - save_signature: + certificate, signature, save_signature: These attributes are used for certificate based authentication. This is used in conjunction with a signing server as an alternative to password based authentication. @@ -257,22 +255,27 @@ Description: The attributes can be displayed to check the stored value. Some usage examples: - Installing a certificate to enable feature: - echo authentication/Admin/current_password - echo > authentication/Admin/certificate - Updating the installed certificate: - echo > authentication/Admin/signature - echo > authentication/Admin/certificate + Installing a certificate to enable feature:: - Removing the installed certificate: - echo > authentication/Admin/signature - echo '' > authentication/Admin/certificate + echo "supervisor password" > authentication/Admin/current_password + echo "signed certificate" > authentication/Admin/certificate - Changing a BIOS setting: - echo > authentication/Admin/signature - echo > authentication/Admin/save_signature - echo Enable > attribute/PasswordBeep/current_value + Updating the installed certificate:: + + echo "signature" > authentication/Admin/signature + echo "signed certificate" > authentication/Admin/certificate + + Removing the installed certificate:: + + echo "signature" > authentication/Admin/signature + echo "" > authentication/Admin/certificate + + Changing a BIOS setting:: + + echo "signature" > authentication/Admin/signature + echo "save signature" > authentication/Admin/save_signature + echo Enable > attribute/PasswordBeep/current_value You cannot enable certificate authentication if a supervisor password has not been set. @@ -288,9 +291,10 @@ Description: certificate_to_password: Write only attribute used to switch from certificate based authentication back to password based. - Usage: - echo > authentication/Admin/signature - echo > authentication/Admin/certificate_to_password + Usage:: + + echo "signature" > authentication/Admin/signature + echo "password" > authentication/Admin/certificate_to_password What: /sys/class/firmware-attributes/*/attributes/pending_reboot From 9aa6471419dc904c4f182295dbe00edfe4c92a29 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:36 +0100 Subject: [PATCH 111/423] Documentation/ABI: sysfs-class-firmware-attributes: Misc. cleanups Cleanup / fix some minor issues. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-4-hdegoede@redhat.com --- Documentation/ABI/testing/sysfs-class-firmware-attributes | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes index 5356ff2ed6c8..4cdba3477176 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware-attributes +++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes @@ -116,7 +116,7 @@ Description: [ForceIf:=] [ForceIfNot:=] - For example: + For example:: LegacyOrom/dell_value_modifier has value: Disabled[ForceIf:SecureBoot=Enabled] @@ -212,7 +212,7 @@ Description: the next boot. Lenovo specific class extensions - ------------------------------ + -------------------------------- On Lenovo systems the following additional settings are available: @@ -349,7 +349,7 @@ Description: # echo "factory" > /sys/class/firmware-attributes/*/device/attributes/reset_bios # cat /sys/class/firmware-attributes/*/device/attributes/reset_bios - # builtinsafe lastknowngood [factory] custom + builtinsafe lastknowngood [factory] custom Note that any changes to this attribute requires a reboot for changes to take effect. From 487532ec20c1a0b9fc85c1265fa81f04a151f007 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Sat, 26 Mar 2022 10:02:49 +0800 Subject: [PATCH 112/423] platform/x86: acerhdf: Cleanup str_starts_with() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since there is already a generic function strstarts() that check if a string starts with a given prefix, cleanup str_starts_with(). Signed-off-by: Wei Li Acked-by: Peter Kästle Link: https://lore.kernel.org/r/20220326020249.3266561-1-liwei391@huawei.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/acerhdf.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 6b8b3ab8db48..3463629f8764 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -584,21 +584,6 @@ static struct platform_driver acerhdf_driver = { .remove = acerhdf_remove, }; -/* checks if str begins with start */ -static int str_starts_with(const char *str, const char *start) -{ - unsigned long str_len = 0, start_len = 0; - - str_len = strlen(str); - start_len = strlen(start); - - if (str_len >= start_len && - !strncmp(str, start, start_len)) - return 1; - - return 0; -} - /* check hardware */ static int __init acerhdf_check_hardware(void) { @@ -651,9 +636,9 @@ static int __init acerhdf_check_hardware(void) * check if actual hardware BIOS vendor, product and version * IDs start with the strings of BIOS table entry */ - if (str_starts_with(vendor, bt->vendor) && - str_starts_with(product, bt->product) && - str_starts_with(version, bt->version)) { + if (strstarts(vendor, bt->vendor) && + strstarts(product, bt->product) && + strstarts(version, bt->version)) { found = 1; break; } From 753ee989f7cf0c0a76a7f56956827a8863a60f97 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 2 Apr 2022 18:11:22 -0500 Subject: [PATCH 113/423] platform/x86: amd-pmc: Fix compilation without CONFIG_SUSPEND Since commit b1f66033cd4e ("platform/x86: amd-pmc: Move to later in the suspend process") amd-pmc doesn't use traditional suspend resume callback anymore but relies on functions only created declared when CONFIG_SUSPEND is set. Check for CONFIG_SUSPEND and only use those functions in those circumstances. Fixes: commit b1f66033cd4e ("platform/x86: amd-pmc: Move to later in the suspend process") Reported-by: Randy Dunlap Signed-off-by: Mario Limonciello Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20220402231122.3877-1-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index e9d0dbbb2887..fa4123dbdf7f 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -160,8 +160,10 @@ MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism"); static struct amd_pmc_dev pmc; static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret); -static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf); +#ifdef CONFIG_SUSPEND +static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); +#endif static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -325,6 +327,7 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table return 0; } +#ifdef CONFIG_SUSPEND static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) { struct smu_metrics table; @@ -338,6 +341,7 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n", table.timein_s0i3_lastcapture); } +#endif #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) @@ -569,6 +573,7 @@ out_unlock: return rc; } +#ifdef CONFIG_SUSPEND static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) { switch (dev->cpu_id) { @@ -694,6 +699,7 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = { .prepare = amd_pmc_s2idle_prepare, .restore = amd_pmc_s2idle_restore, }; +#endif static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, @@ -733,6 +739,7 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) return 0; } +#ifdef CONFIG_SUSPEND static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) { int err; @@ -753,6 +760,7 @@ static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) return 0; } +#endif static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf) { @@ -859,9 +867,11 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); +#ifdef CONFIG_SUSPEND err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops); if (err) dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n"); +#endif amd_pmc_dbgfs_register(dev); return 0; @@ -875,7 +885,9 @@ static int amd_pmc_remove(struct platform_device *pdev) { struct amd_pmc_dev *dev = platform_get_drvdata(pdev); +#ifdef CONFIG_SUSPEND acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops); +#endif amd_pmc_dbgfs_unregister(dev); pci_dev_put(dev->rdev); mutex_destroy(&dev->lock); From 20314bacd2f9b1b8fc10895417e6db0dc85f8248 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 Apr 2022 06:43:38 -0700 Subject: [PATCH 114/423] staging: r8188eu: Fix PPPoE tag insertion on little endian systems In __nat25_add_pppoe_tag(), the tag length is read from the tag data structure. The value is kept in network format, but read as raw value. With -Warray-bounds, this results in the following gcc error/warning when building the driver on alpha. In function '__nat25_add_pppoe_tag', inlined from 'nat25_db_handle' at drivers/staging/r8188eu/core/rtw_br_ext.c:479:11: arch/alpha/include/asm/string.h:22:16: error: '__builtin_memcpy' forming offset [40, 2051] is out of the bounds [0, 40] of object 'tag_buf' with type 'unsigned char[40]' Add the missing be16_to_cpu() to fix the compile error. It should be noted, however, that this fix means that the code did probably not work on any little endian systems and/or that the driver has other endiannes related issues. A build with C=1 suggests that this is indeed the case. This patch does not attempt to fix any of those other issues. Fixes: 15865124feed ("staging: r8188eu: introduce new core dir for RTL8188eu driver") Cc: Phillip Potter Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20220404134338.3276991-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index d68611ef22f8..f056204c0fdb 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -70,7 +70,7 @@ static int __nat25_add_pppoe_tag(struct sk_buff *skb, struct pppoe_tag *tag) struct pppoe_hdr *ph = (struct pppoe_hdr *)(skb->data + ETH_HLEN); int data_len; - data_len = tag->tag_len + TAG_HDR_LEN; + data_len = be16_to_cpu(tag->tag_len) + TAG_HDR_LEN; if (skb_tailroom(skb) < data_len) return -1; From 94865e2dcb46c1c852c881cfa769cec4947d8f28 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 Apr 2022 06:48:59 -0700 Subject: [PATCH 115/423] habanalabs: Fix test build failures allmodconfig builds on 32-bit architectures fail with the following error. drivers/misc/habanalabs/common/memory.c: In function 'alloc_device_memory': drivers/misc/habanalabs/common/memory.c:153:49: error: cast from pointer to integer of different size Fix the typecast. While at it, drop other unnecessary typecasts associated with the same commit. Fixes: e8458e20e0a3c ("habanalabs: make sure device mem alloc is page aligned") Cc: Ohad Sharabi Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20220404134859.3278599-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/misc/habanalabs/common/memory.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index e008d82e4ba3..a13506dd8119 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -111,10 +111,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, if (contiguous) { if (is_power_of_2(page_size)) - paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, - total_size, NULL, page_size); + paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, + total_size, NULL, page_size); else - paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size); + paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, "failed to allocate %llu contiguous pages with total size of %llu\n", @@ -150,12 +150,12 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, for (i = 0 ; i < num_pgs ; i++) { if (is_power_of_2(page_size)) phys_pg_pack->pages[i] = - (u64) gen_pool_dma_alloc_align(vm->dram_pg_pool, - page_size, NULL, - page_size); + (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool, + page_size, NULL, + page_size); else - phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool, - page_size); + phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, + page_size); if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, "Failed to allocate device memory (out of memory)\n"); From 687127c81ad32c8900a3fedbc7ed8f686ca95855 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Mar 2022 15:01:50 -0300 Subject: [PATCH 116/423] cifs: fix potential race with cifsd thread To avoid racing with demultiplex thread while it is handling data on socket, use cifs_signal_cifsd_for_reconnect() helper for marking current server to reconnect and let the demultiplex thread handle the rest. Fixes: dca65818c80c ("cifs: use a different reconnect helper for non-cifsd threads") Reviewed-by: Enzo Matsumiya Reviewed-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- fs/cifs/netmisc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ee3b7c15e884..3ca06bd88b6e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4465,7 +4465,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco */ if (rc && server->current_fullpath != server->origin_fullpath) { server->current_fullpath = server->origin_fullpath; - cifs_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(server, true); } dfs_cache_free_tgts(tl); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index ebe236b9d9f5..235aa1b395eb 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -896,7 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - cifs_reconnect(mid->server, false); + cifs_signal_cifsd_for_reconnect(mid->server, false); } } From 00c796eecba4898194ea549679797ee28f89a92f Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Thu, 31 Mar 2022 23:55:41 +0200 Subject: [PATCH 117/423] cifs: remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Jakob Koschel Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index c653beb735b8..3fe47a88f47d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -150,16 +150,18 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) struct smb2_transform_hdr *thdr = (struct smb2_transform_hdr *)buf; struct cifs_ses *ses = NULL; + struct cifs_ses *iter; /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) { - if (ses->Suid == le64_to_cpu(thdr->SessionId)) + list_for_each_entry(iter, &srvr->smb_ses_list, smb_ses_list) { + if (iter->Suid == le64_to_cpu(thdr->SessionId)) { + ses = iter; break; + } } spin_unlock(&cifs_tcp_ses_lock); - if (list_entry_is_head(ses, &srvr->smb_ses_list, - smb_ses_list)) { + if (!ses) { cifs_dbg(VFS, "no decryption - session id not found\n"); return 1; } From 10cb21f4ff3f9cb36d1e1c39bf80426f02f4986a Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Thu, 31 Mar 2022 10:07:57 +0530 Subject: [PATCH 118/423] Revert "ath11k: mesh: add support for 256 bitmap in blockack frames in 11ax" This reverts commit 743b9065fe6348a5f8f5ce04869ce2d701e5e1bc. The original commit breaks the 256 bitmap in blockack frames in AP mode. After reverting the commit the feature works again in both AP and mesh modes Tested-on: IPQ8074 hw2.0 PCI WLAN.HK.2.6.0.1-00786-QCAHKSWPL_SILICONZ-1 Fixes: 743b9065fe63 ("ath11k: mesh: add support for 256 bitmap in blockack frames in 11ax") Signed-off-by: Anilkumar Kolli Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1648701477-16367-1-git-send-email-quic_akolli@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d5b83f90d27a..e6b34b0d61bd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3136,6 +3136,20 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, arvif->do_not_send_tmpl = true; else arvif->do_not_send_tmpl = false; + + if (vif->bss_conf.he_support) { + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + WMI_VDEV_PARAM_BA_MODE, + WMI_BA_MODE_BUFFER_SIZE_256); + if (ret) + ath11k_warn(ar->ab, + "failed to set BA BUFFER SIZE 256 for vdev: %d\n", + arvif->vdev_id); + else + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "Set BA BUFFER SIZE 256 for VDEV: %d\n", + arvif->vdev_id); + } } if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { @@ -3171,14 +3185,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, if (arvif->is_up && vif->bss_conf.he_support && vif->bss_conf.he_oper.params) { - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_BA_MODE, - WMI_BA_MODE_BUFFER_SIZE_256); - if (ret) - ath11k_warn(ar->ab, - "failed to set BA BUFFER SIZE 256 for vdev: %d\n", - arvif->vdev_id); - param_id = WMI_VDEV_PARAM_HEOPS_0_31; param_value = vif->bss_conf.he_oper.params; ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, From d39268ad24c0fd0665d0c5cf55a7c1a0ebf94766 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 18 Mar 2022 06:52:59 -0700 Subject: [PATCH 119/423] x86/mm/tlb: Revert retpoline avoidance approach 0day reported a regression on a microbenchmark which is intended to stress the TLB flushing path: https://lore.kernel.org/all/20220317090415.GE735@xsang-OptiPlex-9020/ It pointed at a commit from Nadav which intended to remove retpoline overhead in the TLB flushing path by taking the 'cond'-ition in on_each_cpu_cond_mask(), pre-calculating it, and incorporating it into 'cpumask'. That allowed the code to use a bunch of earlier direct calls instead of later indirect calls that need a retpoline. But, in practice, threads can go idle (and into lazy TLB mode where they don't need to flush their TLB) between the early and late calls. It works in this direction and not in the other because TLB-flushing threads tend to hold mmap_lock for write. Contention on that lock causes threads to _go_ idle right in this early/late window. There was not any performance data in the original commit specific to the retpoline overhead. I did a few tests on a system with retpolines: https://lore.kernel.org/all/dd8be93c-ded6-b962-50d4-96b1c3afb2b7@intel.com/ which showed a possible small win. But, that small win pales in comparison with the bigger loss induced on non-retpoline systems. Revert the patch that removed the retpolines. This was not a clean revert, but it was self-contained enough not to be too painful. Fixes: 6035152d8eeb ("x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy()") Reported-by: kernel test robot Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Acked-by: Nadav Amit Cc: Link: https://lkml.kernel.org/r/164874672286.389.7021457716635788197.tip-bot2@tip-bot2 --- arch/x86/mm/tlb.c | 37 +++++-------------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6eb4d91d5365..d400b6d9d246 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -855,13 +855,11 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu) +static bool tlb_is_not_lazy(int cpu, void *data) { return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); } -static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared); @@ -890,36 +888,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ - if (info->freed_tables) { + if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - } else { - /* - * Although we could have used on_each_cpu_cond_mask(), - * open-coding it has performance advantages, as it eliminates - * the need for indirect calls or retpolines. In addition, it - * allows to use a designated cpumask for evaluating the - * condition, instead of allocating one. - * - * This code works under the assumption that there are no nested - * TLB flushes, an assumption that is already made in - * flush_tlb_mm_range(). - * - * cond_cpumask is logically a stack-local variable, but it is - * more efficient to have it off the stack and not to allocate - * it on demand. Preemption is disabled and this code is - * non-reentrant. - */ - struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask); - int cpu; - - cpumask_clear(cond_cpumask); - - for_each_cpu(cpu, cpumask) { - if (tlb_is_not_lazy(cpu)) - __cpumask_set_cpu(cpu, cond_cpumask); - } - on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true); - } + else + on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + (void *)info, 1, cpumask); } void flush_tlb_multi(const struct cpumask *cpumask, From 4d809f69695d4e7d1378b3a072fa9aef23123018 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 28 Feb 2022 17:53:30 +0100 Subject: [PATCH 120/423] IB/rdmavt: add lock to call to rvt_error_qp to prevent a race condition The documentation of the function rvt_error_qp says both r_lock and s_lock need to be held when calling that function. It also asserts using lockdep that both of those locks are held. However, the commit I referenced in Fixes accidentally makes the call to rvt_error_qp in rvt_ruc_loopback no longer covered by r_lock. This results in the lockdep assertion failing and also possibly in a race condition. Fixes: d757c60eca9b ("IB/rdmavt: Fix concurrency panics in QP post_send and modify to error") Link: https://lore.kernel.org/r/20220228165330.41546-1-dossche.niels@gmail.com Signed-off-by: Niels Dossche Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ae50b56e8913..8ef112f883a7 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3190,7 +3190,11 @@ serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); rvt_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); From 02c7efa43627163e489a8db87882445a0ff381f7 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 31 Jan 2022 13:23:41 -0800 Subject: [PATCH 121/423] Documentation: kunit: fix path to .kunitconfig in start.rst Commit ddbd60c779b4 ("kunit: use --build_dir=.kunit as default") changed the default --build_dir, which had the side effect of making `.kunitconfig` move to `.kunit/.kunitconfig`. However, the first few lines of kunit/start.rst never got updated, oops. Fix this by telling people to run kunit.py first, which will automatically generate the .kunit directory and .kunitconfig file, and then edit the file manually as desired. Reported-by: Yifan Yuan Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/start.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index ad168d16968f..867a4bba6bf6 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -41,13 +41,18 @@ or ``VFAT_FS``. To run ``FAT_KUNIT_TEST``, the ``.kunitconfig`` has: CONFIG_MSDOS_FS=y CONFIG_FAT_KUNIT_TEST=y -1. A good starting point for the ``.kunitconfig``, is the KUnit default - config. Run the command: +1. A good starting point for the ``.kunitconfig`` is the KUnit default config. + You can generate it by running: .. code-block:: bash cd $PATH_TO_LINUX_REPO - cp tools/testing/kunit/configs/default.config .kunitconfig + tools/testing/kunit/kunit.py config + cat .kunit/.kunitconfig + +.. note :: + ``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is + ``.kunit`` by default. .. note :: You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as From dfbba2518aac4204203b0697a894d3b2f80134d3 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sun, 3 Apr 2022 15:23:22 +0900 Subject: [PATCH 122/423] Revert "ACPI: processor: idle: Only flush cache on entering C3" Revert commit 87ebbb8c612b ("ACPI: processor: idle: Only flush cache on entering C3") that broke the assumptions of the acpi_idle_play_dead() callers. Namely, the CPU cache must always be flushed in acpi_idle_play_dead(), regardless of the target C-state that is going to be requested, because this is likely to be part of a CPU offline procedure or preparation for entering a system-wide sleep state and the lack of synchronization between the CPU cache and RAM may lead to problems going forward, for example when the CPU is brought back online. In particular, it breaks resume from suspend-to-RAM on Lenovo ThinkPad C13 which fails occasionally until the problematic commit is reverted. Signed-off-by: Akihiko Odaki [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 32b20efff5f8..4556c86c3465 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -570,8 +570,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); - if (cx->type == ACPI_STATE_C3) - ACPI_FLUSH_CPU_CACHE(); + ACPI_FLUSH_CPU_CACHE(); while (1) { From 8ff88bec6f6186c406aa71312b2919e56f7b8084 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Mon, 21 Mar 2022 13:27:42 +0800 Subject: [PATCH 123/423] selftests/vDSO: fix array_size.cocci warning Fix the following coccicheck warning: tools/testing/selftests/vDSO/vdso_test_correctness.c:309:46-47: WARNING: Use ARRAY_SIZE tools/testing/selftests/vDSO/vdso_test_correctness.c:373:46-47: WARNING: Use ARRAY_SIZE It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64. Signed-off-by: Guo Zhengkui Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index c4aea794725a..e691a3cf1491 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -20,6 +20,7 @@ #include #include "vdso_config.h" +#include "../kselftest.h" static const char **name; @@ -306,10 +307,8 @@ static void test_clock_gettime(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime(-1, "invalid"); @@ -370,10 +369,8 @@ static void test_clock_gettime64(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime64(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime64(-1, "invalid"); From 1585b1b55a2b9086823a6b30031eb63f965f8d44 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Mon, 21 Mar 2022 18:25:17 +0800 Subject: [PATCH 124/423] selftests/proc: fix array_size.cocci warning Fix the following coccicheck warning: tools/testing/selftests/proc/proc-pid-vm.c:371:26-27: WARNING: Use ARRAY_SIZE tools/testing/selftests/proc/proc-pid-vm.c:420:26-27: WARNING: Use ARRAY_SIZE It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64. Signed-off-by: Guo Zhengkui Signed-off-by: Shuah Khan --- tools/testing/selftests/proc/proc-pid-vm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 18a3bde8bc96..28604c9f805c 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -46,6 +46,8 @@ #include #include +#include "../kselftest.h" + static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) { return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); @@ -368,7 +370,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } @@ -417,7 +419,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } } From aa8ce29931d6a37aa80f10ae7aa30045108f276d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Mar 2022 17:55:54 +0800 Subject: [PATCH 125/423] selftests: x86: add 32bit build warnings for SUSE In order to successfully build all these 32bit tests, these 32bit gcc and glibc packages, named gcc-32bit and glibc-devel-static-32bit on SUSE, need to be installed. This patch added this information in warn_32bit_failure. Signed-off-by: Geliang Tang Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 53df7d3893d3..0388c4d60af0 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -92,6 +92,10 @@ warn_32bit_failure: echo "If you are using a Fedora-like distribution, try:"; \ echo ""; \ echo " yum install glibc-devel.*i686"; \ + echo ""; \ + echo "If you are using a SUSE-like distribution, try:"; \ + echo ""; \ + echo " zypper install gcc-32bit glibc-devel-static-32bit"; \ exit 0; endif From 52035628fae646269b1379417926fa3d60ef87d0 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 24 Mar 2022 15:39:28 -0700 Subject: [PATCH 126/423] selftests: fix header dependency for pid_namespace selftests The way the test target was defined before, when building with clang we get a command line like this: clang -Wall -Werror -g -I../../../../usr/include/ \ regression_enomem.c ../pidfd/pidfd.h -o regression_enomem This yields an error, because clang thinks we want to produce both a *.o file, as well as a precompiled header: clang: error: cannot specify -o when generating multiple output files gcc, for whatever reason, doesn't exhibit the same behavior which I suspect is why the problem wasn't noticed before. This can be fixed simply by using the LOCAL_HDRS infrastructure the selftests lib.mk provides. It does the right think and marks the target as depending on the header (so if the header changes, we rebuild), but it filters the header out of the compiler command line, so we don't get the error described above. Signed-off-by: Axel Rasmussen Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pid_namespace/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index dcaefa224ca0..edafaca1aeb3 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g -I../../../../usr/include/ -TEST_GEN_PROGS := regression_enomem +TEST_GEN_PROGS = regression_enomem + +LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h include ../lib.mk - -$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h From 187816d07729ff88e75d84efbc668642106cebf3 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 24 Mar 2022 15:39:29 -0700 Subject: [PATCH 127/423] selftests: fix an unused variable warning in pidfd selftest I fixed a few warnings like this in commit e2aa5e650b07 ("selftests: fixup build warnings in pidfd / clone3 tests"), but I missed this one by mistake. Since this variable is unused, remove it. Signed-off-by: Axel Rasmussen Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_wait.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 17999e082aa7..070c1c876df1 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -95,7 +95,6 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, From 63e6b2a42342c3297cce286fb124c99be9e0f3fd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 24 Mar 2022 16:19:06 -0700 Subject: [PATCH 128/423] selftests/harness: Run TEARDOWN for ASSERT failures The kselftest test harness has traditionally not run the registered TEARDOWN handler when a test encountered an ASSERT. This creates unexpected situations and tests need to be very careful about using ASSERT, which seems a needless hurdle for test writers. Because of the harness's design for optional failure handlers, the original implementation of ASSERT used an abort() to immediately stop execution, but that meant the context for running teardown was lost. Instead, use setjmp/longjmp so that teardown can be done. Failed SETUP routines continue to not be followed by TEARDOWN, though. Cc: Andy Lutomirski Cc: Will Drewry Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 49 ++++++++++++++------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 11779405dc80..696eab05f995 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -64,6 +64,7 @@ #include #include #include +#include #include "kselftest.h" @@ -183,7 +184,10 @@ struct __test_metadata *_metadata, \ struct __fixture_variant_metadata *variant) \ { \ - test_name(_metadata); \ + _metadata->setup_completed = true; \ + if (setjmp(_metadata->env) == 0) \ + test_name(_metadata); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata _##test_name##_object = \ { .name = #test_name, \ @@ -356,10 +360,7 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. - * - * Warning: use of ASSERT_* here will skip TEARDOWN. */ -/* TODO(wad) register fixtures on dedicated test lists. */ #define TEST_F(fixture_name, test_name) \ __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) @@ -381,12 +382,17 @@ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ - fixture_name##_setup(_metadata, &self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (!_metadata->passed) \ - return; \ - fixture_name##_##test_name(_metadata, &self, variant->data); \ - fixture_name##_teardown(_metadata, &self); \ + if (setjmp(_metadata->env) == 0) { \ + fixture_name##_setup(_metadata, &self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + _metadata->setup_completed = true; \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ + } \ + if (_metadata->setup_completed) \ + fixture_name##_teardown(_metadata, &self); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata \ _##fixture_name##_##test_name##_object = { \ @@ -683,7 +689,7 @@ */ #define OPTIONAL_HANDLER(_assert) \ for (; _metadata->trigger; _metadata->trigger = \ - __bail(_assert, _metadata->no_print, _metadata->step)) + __bail(_assert, _metadata)) #define __INC_STEP(_metadata) \ /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ @@ -830,6 +836,9 @@ struct __test_metadata { bool timed_out; /* did this test timeout instead of exiting? */ __u8 step; bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ + bool aborted; /* stopped test due to failed ASSERT */ + bool setup_completed; /* did setup finish? */ + jmp_buf env; /* for exiting out of test early */ struct __test_results *results; struct __test_metadata *prev, *next; }; @@ -848,14 +857,24 @@ static inline void __register_test(struct __test_metadata *t) __LIST_APPEND(t->fixture->tests, t); } -static inline int __bail(int for_realz, bool no_print, __u8 step) +static inline int __bail(int for_realz, struct __test_metadata *t) { + /* if this is ASSERT, return immediately. */ if (for_realz) { - if (no_print) - _exit(step); + t->aborted = true; + longjmp(t->env, 1); + } + /* otherwise, end the for loop and continue. */ + return 0; +} + +static inline void __test_check_assert(struct __test_metadata *t) +{ + if (t->aborted) { + if (t->no_print) + _exit(t->step); abort(); } - return 0; } struct __test_metadata *__active_test; From 79ee8aa31d518c1fd5f3b1b1ac39dd1fb4dc7039 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Mar 2022 16:19:07 -0700 Subject: [PATCH 129/423] selftests/harness: Pass variant to teardown FIXTURE_VARIANT data is passed to FIXTURE_SETUP and TEST_F as "variant". In some cases, the variant will change the setup, such that expectations also change on teardown. Also pass variant to FIXTURE_TEARDOWN. The new FIXTURE_TEARDOWN logic is identical to that in FIXTURE_SETUP, right above. Signed-off-by: Willem de Bruijn Reviewed-by: Jakub Kicinski Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201210231010.420298-1-willemdebruijn.kernel@gmail.com Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 696eab05f995..25f4d54067c0 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -291,7 +291,9 @@ #define FIXTURE_TEARDOWN(fixture_name) \ void fixture_name##_teardown( \ struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) /** * FIXTURE_VARIANT() - Optionally called once per fixture @@ -306,9 +308,9 @@ * ... * }; * - * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F() - * as *variant*. Variants allow the same tests to be run with different - * arguments. + * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and + * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with + * different arguments. */ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name @@ -391,7 +393,7 @@ fixture_name##_##test_name(_metadata, &self, variant->data); \ } \ if (_metadata->setup_completed) \ - fixture_name##_teardown(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self, variant->data); \ __test_check_assert(_metadata); \ } \ static struct __test_metadata \ From 38d4e5cf5b08798f093374e53c2f4609d5382dd5 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 22 Mar 2022 13:48:00 +0100 Subject: [PATCH 130/423] drm/nouveau/pmu: Add missing callbacks for Tegra devices Fixes a crash booting on those platforms with nouveau. Fixes: 4cdd2450bf73 ("drm/nouveau/pmu/gm200-: use alternate falcon reset sequence") Cc: Ben Skeggs Cc: Karol Herbst Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: # v5.17+ Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220322124800.2605463-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index e1772211b0a4..612310d5d481 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -216,6 +216,7 @@ gm20b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gf100_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c index 6bf7fc1bd1e3..1a6f9c3af5ec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp102_pmu_reset(struct nvkm_pmu *pmu) { struct nvkm_device *device = pmu->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index ba1583bb618b..94cfb1791af6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -83,6 +83,7 @@ gp10b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gp102_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index bcaade758ff7..21abf31f4442 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); bool gf100_pmu_enabled(struct nvkm_pmu *); void gf100_pmu_reset(struct nvkm_pmu *); +void gp102_pmu_reset(struct nvkm_pmu *pmu); void gk110_pmu_pgob(struct nvkm_pmu *, bool); From a3e4bc23d5470b2beb7cc42a86b6a3e75b704c15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:59:20 -0600 Subject: [PATCH 131/423] io_uring: defer splice/tee file validity check until command issue In preparation for not using the file at prep time, defer checking if this file refers to a valid io_uring instance until issue time. This also means we can get rid of the cleanup flag for splice and tee. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9108c56bff5b..0152ef49cf46 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -654,10 +654,10 @@ struct io_epoll { struct io_splice { struct file *file_out; - struct file *file_in; loff_t off_out; loff_t off_in; u64 len; + int splice_fd_in; unsigned int flags; }; @@ -1687,14 +1687,6 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - - switch (req->opcode) { - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!S_ISREG(file_inode(req->splice.file_in)->i_mode)) - req->work.flags |= IO_WQ_WORK_UNBOUND; - break; - } } static void io_prep_async_link(struct io_kiocb *req) @@ -4369,18 +4361,11 @@ static int __io_splice_prep(struct io_kiocb *req, if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); - if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; - - sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in), - (sp->flags & SPLICE_F_FD_IN_FIXED)); - if (!sp->file_in) - return -EBADF; - req->flags |= REQ_F_NEED_CLEANUP; + sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); return 0; } @@ -4395,20 +4380,27 @@ static int io_tee_prep(struct io_kiocb *req, static int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -4427,15 +4419,22 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; @@ -4444,8 +4443,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -7176,11 +7174,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(io->free_iov); break; } - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(req->splice.file_in); - break; case IORING_OP_OPENAT: case IORING_OP_OPENAT2: if (req->open.filename) From 584b0180f0f4d67d7145950fe68c625f06c88b10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:48:05 -0600 Subject: [PATCH 132/423] io_uring: move read/write file prep state into actual opcode handler In preparation for not necessarily having a file assigned at prep time, defer any initialization associated with the file to when the opcode handler is run. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 101 ++++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0152ef49cf46..969f65de9972 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -592,7 +592,8 @@ struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; u64 addr; - u64 len; + u32 len; + u32 flags; }; struct io_connect { @@ -3178,42 +3179,11 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; - struct file *file = req->file; unsigned ioprio; int ret; - if (!io_req_ffs_set(req)) - req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; - kiocb->ki_pos = READ_ONCE(sqe->off); - kiocb->ki_flags = iocb_flags(file); - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); - if (unlikely(ret)) - return ret; - - /* - * If the file is marked O_NONBLOCK, still allow retry for it if it - * supports async. Otherwise it's impossible to use O_NONBLOCK files - * reliably. If not, or it IOCB_NOWAIT is set, don't retry. - */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) - req->flags |= REQ_F_NOWAIT; - - if (ctx->flags & IORING_SETUP_IOPOLL) { - if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) - return -EOPNOTSUPP; - - kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; - kiocb->ki_complete = io_complete_rw_iopoll; - req->iopoll_completed = 0; - } else { - if (kiocb->ki_flags & IOCB_HIPRI) - return -EINVAL; - kiocb->ki_complete = io_complete_rw; - } ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { @@ -3229,6 +3199,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = NULL; req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); + req->rw.flags = READ_ONCE(sqe->rw_flags); req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -3732,13 +3703,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) return 0; } -static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_READ))) - return -EBADF; - return io_prep_rw(req, sqe); -} - /* * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. @@ -3826,6 +3790,49 @@ static bool need_read_all(struct io_kiocb *req) S_ISBLK(file_inode(req->file)->i_mode); } +static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) +{ + struct kiocb *kiocb = &req->rw.kiocb; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = req->file; + int ret; + + if (unlikely(!file || !(file->f_mode & mode))) + return -EBADF; + + if (!io_req_ffs_set(req)) + req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; + + kiocb->ki_flags = iocb_flags(file); + ret = kiocb_set_rw_flags(kiocb, req->rw.flags); + if (unlikely(ret)) + return ret; + + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) + req->flags |= REQ_F_NOWAIT; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) + return -EOPNOTSUPP; + + kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; + kiocb->ki_complete = io_complete_rw_iopoll; + req->iopoll_completed = 0; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; + kiocb->ki_complete = io_complete_rw; + } + + return 0; +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3861,6 +3868,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_READ); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -3964,13 +3974,6 @@ out_free: return 0; } -static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_WRITE))) - return -EBADF; - return io_prep_rw(req, sqe); -} - static int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3991,6 +3994,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_WRITE); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -6987,11 +6993,10 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_READV: case IORING_OP_READ_FIXED: case IORING_OP_READ: - return io_read_prep(req, sqe); case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - return io_write_prep(req, sqe); + return io_prep_rw(req, sqe); case IORING_OP_POLL_ADD: return io_poll_add_prep(req, sqe); case IORING_OP_POLL_REMOVE: From fb39d30e227233498c8debe6a9fe3e7cf575c85f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 1 Apr 2022 13:51:34 -0300 Subject: [PATCH 133/423] cifs: force new session setup and tcon for dfs Do not reuse existing sessions and tcons in DFS failover as it might connect to different servers and shares. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/connect.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3ca06bd88b6e..54155eb4faac 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -453,9 +453,7 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_ return rc; } -static int -reconnect_dfs_server(struct TCP_Server_Info *server, - bool mark_smb_session) +static int reconnect_dfs_server(struct TCP_Server_Info *server) { int rc = 0; const char *refpath = server->current_fullpath + 1; @@ -479,7 +477,12 @@ reconnect_dfs_server(struct TCP_Server_Info *server, if (!cifs_tcp_ses_needs_reconnect(server, num_targets)) return 0; - cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + /* + * Unconditionally mark all sessions & tcons for reconnect as we might be connecting to a + * different server or share during failover. It could be improved by adding some logic to + * only do that in case it connects to a different server or share, though. + */ + cifs_mark_tcp_ses_conns_for_reconnect(server, true); cifs_abort_connection(server); @@ -537,7 +540,7 @@ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) } spin_unlock(&cifs_tcp_ses_lock); - return reconnect_dfs_server(server, mark_smb_session); + return reconnect_dfs_server(server); } #else int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) From 7cd1cc415dd8d0dca7244c9eafb9a0adc8036805 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 Apr 2022 17:50:59 -0500 Subject: [PATCH 134/423] cifs: update internal module number To 2.36 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 15a5c5db038b..c0542bdcd06b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -153,5 +153,5 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ #define SMB3_PRODUCT_BUILD 35 -#define CIFS_VERSION "2.35" +#define CIFS_VERSION "2.36" #endif /* _CIFSFS_H */ From 8047f98c8958d0f0c29882298ec293ff09ffea92 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 23 Mar 2022 16:48:23 +0100 Subject: [PATCH 135/423] dt-bindings: display: bridge: Drop requirement on input port for DSI devices MIPI-DSI devices, if they are controlled through the bus itself, have to be described as a child node of the controller they are attached to. Thus, there's no requirement on the controller having an OF-Graph output port to model the data stream: it's assumed that it would go from the parent to the child. However, some bridges controlled through the DSI bus still require an input OF-Graph port, thus requiring a controller with an OF-Graph output port. This prevents those bridges from being used with the controllers that do not have one without any particular reason to. Let's drop that requirement. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20220323154823.839469-1-maxime@cerno.tech --- .../devicetree/bindings/display/bridge/chipone,icn6211.yaml | 1 - .../devicetree/bindings/display/bridge/toshiba,tc358762.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml index 62c3bd4cb28d..7257fd0ae4da 100644 --- a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml +++ b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml @@ -51,7 +51,6 @@ properties: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml index 5216c27fc0ad..a412a1da950f 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml @@ -39,7 +39,6 @@ properties: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: From 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Mar 2022 22:03:41 +0100 Subject: [PATCH 136/423] sched/core: Fix forceidle balancing Steve reported that ChromeOS encounters the forceidle balancer being ran from rt_mutex_setprio()'s balance_callback() invocation and explodes. Now, the forceidle balancer gets queued every time the idle task gets selected, set_next_task(), which is strictly too often. rt_mutex_setprio() also uses set_next_task() in the 'change' pattern: queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); However, rt_mutex_setprio() will explicitly not run this pattern on the idle task (since priority boosting the idle task is quite insane). Most other 'change' pattern users are pidhash based and would also not apply to idle. Also, the change pattern doesn't contain a __balance_callback() invocation and hence we could have an out-of-band balance-callback, which *should* trigger the WARN in rq_pin_lock() (which guards against this exact anti-pattern). So while none of that explains how this happens, it does indicate that having it in set_next_task() might not be the most robust option. Instead, explicitly queue the forceidle balancer from pick_next_task() when it does indeed result in forceidle selection. Having it here, ensures it can only be triggered under the __schedule() rq->lock instance, and hence must be ran from that context. This also happens to clean up the code a little, so win-win. Fixes: d2dfa17bc7de ("sched: Trivial forced-newidle balancer") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Tested-by: T.J. Alumbaugh Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net --- kernel/sched/core.c | 14 ++++++++++---- kernel/sched/idle.c | 1 - kernel/sched/sched.h | 6 ------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d575b4914925..017ee7807930 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5752,6 +5752,8 @@ static inline struct task_struct *pick_task(struct rq *rq) extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); +static void queue_core_balance(struct rq *rq); + static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -5801,7 +5803,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } rq->core_pick = NULL; - return next; + goto out; } put_prev_task_balance(rq, prev, rf); @@ -5851,7 +5853,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) */ WARN_ON_ONCE(fi_before); task_vruntime_update(rq, next, false); - goto done; + goto out_set_next; } } @@ -5970,8 +5972,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) resched_curr(rq_i); } -done: +out_set_next: set_next_task(rq, next); +out: + if (rq->core->core_forceidle_count && next == rq->idle) + queue_core_balance(rq); + return next; } @@ -6066,7 +6072,7 @@ static void sched_core_balance(struct rq *rq) static DEFINE_PER_CPU(struct callback_head, core_balance_head); -void queue_core_balance(struct rq *rq) +static void queue_core_balance(struct rq *rq) { if (!sched_core_enabled(rq)) return; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8f8b5020e76a..ecb0d7052877 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -434,7 +434,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); - queue_core_balance(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58263f90c559..8dccb34eb190 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1232,8 +1232,6 @@ static inline bool sched_group_cookie_match(struct rq *rq, return false; } -extern void queue_core_balance(struct rq *rq); - static inline bool sched_core_enqueued(struct task_struct *p) { return !RB_EMPTY_NODE(&p->core_node); @@ -1267,10 +1265,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } -static inline void queue_core_balance(struct rq *rq) -{ -} - static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) { return true; From 386ef214c3c6ab111d05e1790e79475363abaa05 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Mar 2022 15:51:32 +0100 Subject: [PATCH 137/423] sched: Teach the forced-newidle balancer about CPU affinity limitation. try_steal_cookie() looks at task_struct::cpus_mask to decide if the task could be moved to `this' CPU. It ignores that the task might be in a migration disabled section while not on the CPU. In this case the task must not be moved otherwise per-CPU assumption are broken. Use is_cpu_allowed(), as suggested by Peter Zijlstra, to decide if the a task can be moved. Fixes: d2dfa17bc7de6 ("sched: Trivial forced-newidle balancer") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YjNK9El+3fzGmswf@linutronix.de --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017ee7807930..51efaabac3e4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6006,7 +6006,7 @@ static bool try_steal_cookie(int this, int that) if (p == src->core_pick || p == src->curr) goto next; - if (!cpumask_test_cpu(this, &p->cpus_mask)) + if (!is_cpu_allowed(p, this)) goto next; if (p->core_occupation > dst->idle->core_occupation) From 0a70045ed8516dfcff4b5728557e1ef3fd017c53 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 30 Mar 2022 10:43:28 +0200 Subject: [PATCH 138/423] entry: Fix compile error in dynamic_irqentry_exit_cond_resched() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/entry/common.c: In function ‘dynamic_irqentry_exit_cond_resched’: kernel/entry/common.c:409:14: error: implicit declaration of function ‘static_key_unlikely’; did you mean ‘static_key_enable’? [-Werror=implicit-function-declaration] 409 | if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) | ^~~~~~~~~~~~~~~~~~~ | static_key_enable static_key_unlikely() should be static_branch_unlikely(). Fixes: 99cf983cc8bca ("sched/preempt: Add PREEMPT_DYNAMIC using static keys") Signed-off-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220330084328.1805665-1-svens@linux.ibm.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e57a224d6b79..93c3b86e781c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -392,7 +392,7 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); void dynamic_irqentry_exit_cond_resched(void) { - if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) + if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) return; raw_irqentry_exit_cond_resched(); } From 1cd5f059d956e6f614ba6666ecdbcf95db05d5f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Mar 2022 21:24:38 +0100 Subject: [PATCH 139/423] x86,static_call: Fix __static_call_return0 for i386 Paolo reported that the instruction sequence that is used to replace: call __static_call_return0 namely: 66 66 48 31 c0 data16 data16 xor %rax,%rax decodes to something else on i386, namely: 66 66 48 data16 dec %ax 31 c0 xor %eax,%eax Which is a nonsensical sequence that happens to have the same outcome. *However* an important distinction is that it consists of 2 instructions which is a problem when the thing needs to be overwriten with a regular call instruction again. As such, replace the instruction with something that decodes the same on both i386 and x86_64. Fixes: 3f2a8fc4b15d ("static_call/x86: Add __static_call_return0()") Reported-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220318204419.GT8939@worktop.programming.kicks-ass.net --- arch/x86/kernel/static_call.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 531fb4cbb63f..aa72cefdd5be 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,10 +12,9 @@ enum insn_type { }; /* - * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax - * The REX.W cancels the effect of any data16. + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ -static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; +static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; From 8fd4ddda2f49a66bf5dd3d0c01966c4b1971308b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 12:49:36 +0100 Subject: [PATCH 140/423] static_call: Don't make __static_call_return0 static System.map shows that vmlinux contains several instances of __static_call_return0(): c0004fc0 t __static_call_return0 c0011518 t __static_call_return0 c00d8160 t __static_call_return0 arch_static_call_transform() uses the middle one to check whether we are setting a call to __static_call_return0 or not: c0011520 : c0011520: 3d 20 c0 01 lis r9,-16383 <== r9 = 0xc001 << 16 c0011524: 39 29 15 18 addi r9,r9,5400 <== r9 += 0x1518 c0011528: 7c 05 48 00 cmpw r5,r9 <== r9 has value 0xc0011518 here So if static_call_update() is called with one of the other instances of __static_call_return0(), arch_static_call_transform() won't recognise it. In order to work properly, global single instance of __static_call_return0() is required. Fixes: 3f2a8fc4b15d ("static_call/x86: Add __static_call_return0()") Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/30821468a0e7d28251954b578e5051dc09300d04.1647258493.git.christophe.leroy@csgroup.eu --- include/linux/static_call.h | 5 +- kernel/Makefile | 3 +- kernel/static_call.c | 541 ----------------------------------- kernel/static_call_inline.c | 543 ++++++++++++++++++++++++++++++++++++ 4 files changed, 546 insertions(+), 546 deletions(-) create mode 100644 kernel/static_call_inline.c diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3e56a9751c06..fcc5b48989b3 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -248,10 +248,7 @@ static inline int static_call_text_reserved(void *start, void *end) return 0; } -static inline long __static_call_return0(void) -{ - return 0; -} +extern long __static_call_return0(void); #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ diff --git a/kernel/Makefile b/kernel/Makefile index 471d71935e90..847a82bfe0e3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -114,7 +114,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o -obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/static_call.c b/kernel/static_call.c index f2b8baea35d2..e9c3e69f3837 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -1,549 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct static_call_site __start_static_call_sites[], - __stop_static_call_sites[]; -extern struct static_call_tramp_key __start_static_call_tramp_key[], - __stop_static_call_tramp_key[]; - -static bool static_call_initialized; - -/* mutex to protect key modules/sites */ -static DEFINE_MUTEX(static_call_mutex); - -static void static_call_lock(void) -{ - mutex_lock(&static_call_mutex); -} - -static void static_call_unlock(void) -{ - mutex_unlock(&static_call_mutex); -} - -static inline void *static_call_addr(struct static_call_site *site) -{ - return (void *)((long)site->addr + (long)&site->addr); -} - -static inline unsigned long __static_call_key(const struct static_call_site *site) -{ - return (long)site->key + (long)&site->key; -} - -static inline struct static_call_key *static_call_key(const struct static_call_site *site) -{ - return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); -} - -/* These assume the key is word-aligned. */ -static inline bool static_call_is_init(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_INIT; -} - -static inline bool static_call_is_tail(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_TAIL; -} - -static inline void static_call_set_init(struct static_call_site *site) -{ - site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - - (long)&site->key; -} - -static int static_call_site_cmp(const void *_a, const void *_b) -{ - const struct static_call_site *a = _a; - const struct static_call_site *b = _b; - const struct static_call_key *key_a = static_call_key(a); - const struct static_call_key *key_b = static_call_key(b); - - if (key_a < key_b) - return -1; - - if (key_a > key_b) - return 1; - - return 0; -} - -static void static_call_site_swap(void *_a, void *_b, int size) -{ - long delta = (unsigned long)_a - (unsigned long)_b; - struct static_call_site *a = _a; - struct static_call_site *b = _b; - struct static_call_site tmp = *a; - - a->addr = b->addr - delta; - a->key = b->key - delta; - - b->addr = tmp.addr + delta; - b->key = tmp.key + delta; -} - -static inline void static_call_sort_entries(struct static_call_site *start, - struct static_call_site *stop) -{ - sort(start, stop - start, sizeof(struct static_call_site), - static_call_site_cmp, static_call_site_swap); -} - -static inline bool static_call_key_has_mods(struct static_call_key *key) -{ - return !(key->type & 1); -} - -static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) -{ - if (!static_call_key_has_mods(key)) - return NULL; - - return key->mods; -} - -static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) -{ - if (static_call_key_has_mods(key)) - return NULL; - - return (struct static_call_site *)(key->type & ~1); -} - -void __static_call_update(struct static_call_key *key, void *tramp, void *func) -{ - struct static_call_site *site, *stop; - struct static_call_mod *site_mod, first; - - cpus_read_lock(); - static_call_lock(); - - if (key->func == func) - goto done; - - key->func = func; - - arch_static_call_transform(NULL, tramp, func, false); - - /* - * If uninitialized, we'll not update the callsites, but they still - * point to the trampoline and we just patched that. - */ - if (WARN_ON_ONCE(!static_call_initialized)) - goto done; - - first = (struct static_call_mod){ - .next = static_call_key_next(key), - .mod = NULL, - .sites = static_call_key_sites(key), - }; - - for (site_mod = &first; site_mod; site_mod = site_mod->next) { - bool init = system_state < SYSTEM_RUNNING; - struct module *mod = site_mod->mod; - - if (!site_mod->sites) { - /* - * This can happen if the static call key is defined in - * a module which doesn't use it. - * - * It also happens in the has_mods case, where the - * 'first' entry has no sites associated with it. - */ - continue; - } - - stop = __stop_static_call_sites; - - if (mod) { -#ifdef CONFIG_MODULES - stop = mod->static_call_sites + - mod->num_static_call_sites; - init = mod->state == MODULE_STATE_COMING; -#endif - } - - for (site = site_mod->sites; - site < stop && static_call_key(site) == key; site++) { - void *site_addr = static_call_addr(site); - - if (!init && static_call_is_init(site)) - continue; - - if (!kernel_text_address((unsigned long)site_addr)) { - /* - * This skips patching built-in __exit, which - * is part of init_section_contains() but is - * not part of kernel_text_address(). - * - * Skipping built-in __exit is fine since it - * will never be executed. - */ - WARN_ONCE(!static_call_is_init(site), - "can't patch static call site at %pS", - site_addr); - continue; - } - - arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); - } - } - -done: - static_call_unlock(); - cpus_read_unlock(); -} -EXPORT_SYMBOL_GPL(__static_call_update); - -static int __static_call_init(struct module *mod, - struct static_call_site *start, - struct static_call_site *stop) -{ - struct static_call_site *site; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod; - - if (start == stop) - return 0; - - static_call_sort_entries(start, stop); - - for (site = start; site < stop; site++) { - void *site_addr = static_call_addr(site); - - if ((mod && within_module_init((unsigned long)site_addr, mod)) || - (!mod && init_section_contains(site_addr, 1))) - static_call_set_init(site); - - key = static_call_key(site); - if (key != prev_key) { - prev_key = key; - - /* - * For vmlinux (!mod) avoid the allocation by storing - * the sites pointer in the key itself. Also see - * __static_call_update()'s @first. - * - * This allows architectures (eg. x86) to call - * static_call_init() before memory allocation works. - */ - if (!mod) { - key->sites = site; - key->type |= 1; - goto do_transform; - } - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - - /* - * When the key has a direct sites pointer, extract - * that into an explicit struct static_call_mod, so we - * can have a list of modules. - */ - if (static_call_key_sites(key)) { - site_mod->mod = NULL; - site_mod->next = NULL; - site_mod->sites = static_call_key_sites(key); - - key->mods = site_mod; - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - } - - site_mod->mod = mod; - site_mod->sites = site; - site_mod->next = static_call_key_next(key); - key->mods = site_mod; - } - -do_transform: - arch_static_call_transform(site_addr, NULL, key->func, - static_call_is_tail(site)); - } - - return 0; -} - -static int addr_conflict(struct static_call_site *site, void *start, void *end) -{ - unsigned long addr = (unsigned long)static_call_addr(site); - - if (addr <= (unsigned long)end && - addr + CALL_INSN_SIZE > (unsigned long)start) - return 1; - - return 0; -} - -static int __static_call_text_reserved(struct static_call_site *iter_start, - struct static_call_site *iter_stop, - void *start, void *end, bool init) -{ - struct static_call_site *iter = iter_start; - - while (iter < iter_stop) { - if (init || !static_call_is_init(iter)) { - if (addr_conflict(iter, start, end)) - return 1; - } - iter++; - } - - return 0; -} - -#ifdef CONFIG_MODULES - -static int __static_call_mod_text_reserved(void *start, void *end) -{ - struct module *mod; - int ret; - - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - - if (!mod) - return 0; - - ret = __static_call_text_reserved(mod->static_call_sites, - mod->static_call_sites + mod->num_static_call_sites, - start, end, mod->state == MODULE_STATE_COMING); - - module_put(mod); - - return ret; -} - -static unsigned long tramp_key_lookup(unsigned long addr) -{ - struct static_call_tramp_key *start = __start_static_call_tramp_key; - struct static_call_tramp_key *stop = __stop_static_call_tramp_key; - struct static_call_tramp_key *tramp_key; - - for (tramp_key = start; tramp_key != stop; tramp_key++) { - unsigned long tramp; - - tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; - if (tramp == addr) - return (long)tramp_key->key + (long)&tramp_key->key; - } - - return 0; -} - -static int static_call_add_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = start + mod->num_static_call_sites; - struct static_call_site *site; - - for (site = start; site != stop; site++) { - unsigned long s_key = __static_call_key(site); - unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; - unsigned long key; - - /* - * Is the key is exported, 'addr' points to the key, which - * means modules are allowed to call static_call_update() on - * it. - * - * Otherwise, the key isn't exported, and 'addr' points to the - * trampoline so we need to lookup the key. - * - * We go through this dance to prevent crazy modules from - * abusing sensitive static calls. - */ - if (!kernel_text_address(addr)) - continue; - - key = tramp_key_lookup(addr); - if (!key) { - pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", - static_call_addr(site)); - return -EINVAL; - } - - key |= s_key & STATIC_CALL_SITE_FLAGS; - site->key = key - (long)&site->key; - } - - return __static_call_init(mod, start, stop); -} - -static void static_call_del_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = mod->static_call_sites + - mod->num_static_call_sites; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod, **prev; - struct static_call_site *site; - - for (site = start; site < stop; site++) { - key = static_call_key(site); - if (key == prev_key) - continue; - - prev_key = key; - - for (prev = &key->mods, site_mod = key->mods; - site_mod && site_mod->mod != mod; - prev = &site_mod->next, site_mod = site_mod->next) - ; - - if (!site_mod) - continue; - - *prev = site_mod->next; - kfree(site_mod); - } -} - -static int static_call_module_notify(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct module *mod = data; - int ret = 0; - - cpus_read_lock(); - static_call_lock(); - - switch (val) { - case MODULE_STATE_COMING: - ret = static_call_add_module(mod); - if (ret) { - WARN(1, "Failed to allocate memory for static calls"); - static_call_del_module(mod); - } - break; - case MODULE_STATE_GOING: - static_call_del_module(mod); - break; - } - - static_call_unlock(); - cpus_read_unlock(); - - return notifier_from_errno(ret); -} - -static struct notifier_block static_call_module_nb = { - .notifier_call = static_call_module_notify, -}; - -#else - -static inline int __static_call_mod_text_reserved(void *start, void *end) -{ - return 0; -} - -#endif /* CONFIG_MODULES */ - -int static_call_text_reserved(void *start, void *end) -{ - bool init = system_state < SYSTEM_RUNNING; - int ret = __static_call_text_reserved(__start_static_call_sites, - __stop_static_call_sites, start, end, init); - - if (ret) - return ret; - - return __static_call_mod_text_reserved(start, end); -} - -int __init static_call_init(void) -{ - int ret; - - if (static_call_initialized) - return 0; - - cpus_read_lock(); - static_call_lock(); - ret = __static_call_init(NULL, __start_static_call_sites, - __stop_static_call_sites); - static_call_unlock(); - cpus_read_unlock(); - - if (ret) { - pr_err("Failed to allocate memory for static_call!\n"); - BUG(); - } - - static_call_initialized = true; - -#ifdef CONFIG_MODULES - register_module_notifier(&static_call_module_nb); -#endif - return 0; -} -early_initcall(static_call_init); long __static_call_return0(void) { return 0; } EXPORT_SYMBOL_GPL(__static_call_return0); - -#ifdef CONFIG_STATIC_CALL_SELFTEST - -static int func_a(int x) -{ - return x+1; -} - -static int func_b(int x) -{ - return x+2; -} - -DEFINE_STATIC_CALL(sc_selftest, func_a); - -static struct static_call_data { - int (*func)(int); - int val; - int expect; -} static_call_data [] __initdata = { - { NULL, 2, 3 }, - { func_b, 2, 4 }, - { func_a, 2, 3 } -}; - -static int __init test_static_call_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { - struct static_call_data *scd = &static_call_data[i]; - - if (scd->func) - static_call_update(sc_selftest, scd->func); - - WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); - } - - return 0; -} -early_initcall(test_static_call_init); - -#endif /* CONFIG_STATIC_CALL_SELFTEST */ diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c new file mode 100644 index 000000000000..dc5665b62814 --- /dev/null +++ b/kernel/static_call_inline.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; +extern struct static_call_tramp_key __start_static_call_tramp_key[], + __stop_static_call_tramp_key[]; + +static bool static_call_initialized; + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +static inline bool static_call_key_has_mods(struct static_call_key *key) +{ + return !(key->type & 1); +} + +static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) +{ + if (!static_call_key_has_mods(key)) + return NULL; + + return key->mods; +} + +static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) +{ + if (static_call_key_has_mods(key)) + return NULL; + + return (struct static_call_site *)(key->type & ~1); +} + +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + struct static_call_site *site, *stop; + struct static_call_mod *site_mod, first; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + arch_static_call_transform(NULL, tramp, func, false); + + /* + * If uninitialized, we'll not update the callsites, but they still + * point to the trampoline and we just patched that. + */ + if (WARN_ON_ONCE(!static_call_initialized)) + goto done; + + first = (struct static_call_mod){ + .next = static_call_key_next(key), + .mod = NULL, + .sites = static_call_key_sites(key), + }; + + for (site_mod = &first; site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; + struct module *mod = site_mod->mod; + + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + * + * It also happens in the has_mods case, where the + * 'first' entry has no sites associated with it. + */ + continue; + } + + stop = __stop_static_call_sites; + + if (mod) { +#ifdef CONFIG_MODULES + stop = mod->static_call_sites + + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; +#endif + } + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + + if (!init && static_call_is_init(site)) + continue; + + if (!kernel_text_address((unsigned long)site_addr)) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +static int __static_call_init(struct module *mod, + struct static_call_site *start, + struct static_call_site *stop) +{ + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if ((mod && within_module_init((unsigned long)site_addr, mod)) || + (!mod && init_section_contains(site_addr, 1))) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + /* + * For vmlinux (!mod) avoid the allocation by storing + * the sites pointer in the key itself. Also see + * __static_call_update()'s @first. + * + * This allows architectures (eg. x86) to call + * static_call_init() before memory allocation works. + */ + if (!mod) { + key->sites = site; + key->type |= 1; + goto do_transform; + } + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + /* + * When the key has a direct sites pointer, extract + * that into an explicit struct static_call_mod, so we + * can have a list of modules. + */ + if (static_call_key_sites(key)) { + site_mod->mod = NULL; + site_mod->next = NULL; + site_mod->sites = static_call_key_sites(key); + + key->mods = site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + } + + site_mod->mod = mod; + site_mod->sites = site; + site_mod->next = static_call_key_next(key); + key->mods = site_mod; + } + +do_transform: + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); + } + + return 0; +} + +static int addr_conflict(struct static_call_site *site, void *start, void *end) +{ + unsigned long addr = (unsigned long)static_call_addr(site); + + if (addr <= (unsigned long)end && + addr + CALL_INSN_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +static int __static_call_text_reserved(struct static_call_site *iter_start, + struct static_call_site *iter_stop, + void *start, void *end, bool init) +{ + struct static_call_site *iter = iter_start; + + while (iter < iter_stop) { + if (init || !static_call_is_init(iter)) { + if (addr_conflict(iter, start, end)) + return 1; + } + iter++; + } + + return 0; +} + +#ifdef CONFIG_MODULES + +static int __static_call_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + int ret; + + preempt_disable(); + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + preempt_enable(); + + if (!mod) + return 0; + + ret = __static_call_text_reserved(mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites, + start, end, mod->state == MODULE_STATE_COMING); + + module_put(mod); + + return ret; +} + +static unsigned long tramp_key_lookup(unsigned long addr) +{ + struct static_call_tramp_key *start = __start_static_call_tramp_key; + struct static_call_tramp_key *stop = __stop_static_call_tramp_key; + struct static_call_tramp_key *tramp_key; + + for (tramp_key = start; tramp_key != stop; tramp_key++) { + unsigned long tramp; + + tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; + if (tramp == addr) + return (long)tramp_key->key + (long)&tramp_key->key; + } + + return 0; +} + +static int static_call_add_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = start + mod->num_static_call_sites; + struct static_call_site *site; + + for (site = start; site != stop; site++) { + unsigned long s_key = __static_call_key(site); + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; + unsigned long key; + + /* + * Is the key is exported, 'addr' points to the key, which + * means modules are allowed to call static_call_update() on + * it. + * + * Otherwise, the key isn't exported, and 'addr' points to the + * trampoline so we need to lookup the key. + * + * We go through this dance to prevent crazy modules from + * abusing sensitive static calls. + */ + if (!kernel_text_address(addr)) + continue; + + key = tramp_key_lookup(addr); + if (!key) { + pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", + static_call_addr(site)); + return -EINVAL; + } + + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)&site->key; + } + + return __static_call_init(mod, start, stop); +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod, **prev; + struct static_call_site *site; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + + prev_key = key; + + for (prev = &key->mods, site_mod = key->mods; + site_mod && site_mod->mod != mod; + prev = &site_mod->next, site_mod = site_mod->next) + ; + + if (!site_mod) + continue; + + *prev = site_mod->next; + kfree(site_mod); + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + ret = static_call_add_module(mod); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#else + +static inline int __static_call_mod_text_reserved(void *start, void *end) +{ + return 0; +} + +#endif /* CONFIG_MODULES */ + +int static_call_text_reserved(void *start, void *end) +{ + bool init = system_state < SYSTEM_RUNNING; + int ret = __static_call_text_reserved(__start_static_call_sites, + __stop_static_call_sites, start, end, init); + + if (ret) + return ret; + + return __static_call_mod_text_reserved(start, end); +} + +int __init static_call_init(void) +{ + int ret; + + if (static_call_initialized) + return 0; + + cpus_read_lock(); + static_call_lock(); + ret = __static_call_init(NULL, __start_static_call_sites, + __stop_static_call_sites); + static_call_unlock(); + cpus_read_unlock(); + + if (ret) { + pr_err("Failed to allocate memory for static_call!\n"); + BUG(); + } + + static_call_initialized = true; + +#ifdef CONFIG_MODULES + register_module_notifier(&static_call_module_nb); +#endif + return 0; +} +early_initcall(static_call_init); + +#ifdef CONFIG_STATIC_CALL_SELFTEST + +static int func_a(int x) +{ + return x+1; +} + +static int func_b(int x) +{ + return x+2; +} + +DEFINE_STATIC_CALL(sc_selftest, func_a); + +static struct static_call_data { + int (*func)(int); + int val; + int expect; +} static_call_data [] __initdata = { + { NULL, 2, 3 }, + { func_b, 2, 4 }, + { func_a, 2, 3 } +}; + +static int __init test_static_call_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { + struct static_call_data *scd = &static_call_data[i]; + + if (scd->func) + static_call_update(sc_selftest, scd->func); + + WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); + } + + return 0; +} +early_initcall(test_static_call_init); + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ From 5517d500829c683a358a8de04ecb2e28af629ae5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 11:27:35 +0100 Subject: [PATCH 141/423] static_call: Properly initialise DEFINE_STATIC_CALL_RET0() When a static call is updated with __static_call_return0() as target, arch_static_call_transform() set it to use an optimised set of instructions which are meant to lay in the same cacheline. But when initialising a static call with DEFINE_STATIC_CALL_RET0(), we get a branch to the real __static_call_return0() function instead of getting the optimised setup: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 4b ff ff f4 b c00d8114 <__static_call_return0> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture to setup the optimised configuration, and rework DEFINE_STATIC_CALL_RET0() to call it: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 48 00 00 14 b c00d8134 <__SCT__perf_snapshot_branch_stack+0x14> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/static_call.h | 1 + arch/x86/include/asm/static_call.h | 2 ++ include/linux/static_call.h | 20 +++++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h index 0a0bc79bd1fa..de1018cc522b 100644 --- a/arch/powerpc/include/asm/static_call.h +++ b/arch/powerpc/include/asm/static_call.h @@ -24,5 +24,6 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20") #endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index ed4f8bb6c2d9..2455d721503e 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -38,6 +38,8 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) #define ARCH_ADD_TRAMP_KEY(name) \ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index fcc5b48989b3..3c50b0fdda16 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -196,6 +196,14 @@ extern long __static_call_return0(void); }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ @@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; } }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) #define static_call_cond(name) (void)__static_call(name) @@ -284,6 +298,9 @@ static inline long __static_call_return0(void) .func = NULL, \ } +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, __static_call_return0) + static inline void __static_call_nop(void) { } /* @@ -327,7 +344,4 @@ static inline int static_call_text_reserved(void *start, void *end) #define DEFINE_STATIC_CALL(name, _func) \ __DEFINE_STATIC_CALL(name, _func, _func) -#define DEFINE_STATIC_CALL_RET0(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, __static_call_return0) - #endif /* _LINUX_STATIC_CALL_H */ From df21c0d7a94db64a4e1a0d070e26fb02e60fefab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 11:27:36 +0100 Subject: [PATCH 142/423] static_call: Remove __DEFINE_STATIC_CALL macro Only DEFINE_STATIC_CALL use __DEFINE_STATIC_CALL macro now when CONFIG_HAVE_STATIC_CALL is selected. Only keep __DEFINE_STATIC_CALL() for the generic fallback, and also use it to implement DEFINE_STATIC_CALL_NULL() in that case. Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/329074f92d96e3220ebe15da7bbe2779beee31eb.1647253456.git.christophe.leroy@csgroup.eu --- include/linux/static_call.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3c50b0fdda16..df53bed9d71f 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -180,13 +180,13 @@ extern int static_call_text_reserved(void *start, void *end); extern long __static_call_return0(void); -#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ +#define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func_init, \ + .func = _func, \ .type = 1, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -225,12 +225,12 @@ extern long __static_call_return0(void); static inline int static_call_init(void) { return 0; } -#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ +#define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func_init, \ + .func = _func, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -292,11 +292,11 @@ static inline long __static_call_return0(void) .func = _func_init, \ } +#define DEFINE_STATIC_CALL(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, _func) + #define DEFINE_STATIC_CALL_NULL(name, _func) \ - DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = NULL, \ - } + __DEFINE_STATIC_CALL(name, _func, NULL) #define DEFINE_STATIC_CALL_RET0(name, _func) \ __DEFINE_STATIC_CALL(name, _func, __static_call_return0) @@ -341,7 +341,4 @@ static inline int static_call_text_reserved(void *start, void *end) #endif /* CONFIG_HAVE_STATIC_CALL */ -#define DEFINE_STATIC_CALL(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, _func) - #endif /* _LINUX_STATIC_CALL_H */ From 1c1e7e3c23dd25f938302428eeb22c3dda2c3427 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:08 +0200 Subject: [PATCH 143/423] x86/percpu: Remove volatile from arch_raw_cpu_ptr(). The volatile attribute in the inline assembly of arch_raw_cpu_ptr() forces the compiler to always generate the code, even if the compiler can decide upfront that its result is not needed. For instance invoking __intel_pmu_disable_all(false) (like intel_pmu_snapshot_arch_branch_stack() does) leads to loading the address of &cpu_hw_events into the register while compiler knows that it has no need for it. This ends up with code like: | movq $cpu_hw_events, %rax #, tcp_ptr__ | add %gs:this_cpu_off(%rip), %rax # this_cpu_off, tcp_ptr__ | xorl %eax, %eax # tmp93 It also creates additional code within local_lock() with !RT && !LOCKDEP which is not desired. By removing the volatile attribute the compiler can place the function freely and avoid it if it is not needed in the end. By using the function twice the compiler properly caches only the variable offset and always loads the CPU-offset. this_cpu_ptr() also remains properly placed within a preempt_disable() sections because - arch_raw_cpu_ptr() assembly has a memory input ("m" (this_cpu_off)) - prempt_{dis,en}able() fundamentally has a 'barrier()' in it Therefore this_cpu_ptr() is already properly serialized and does not rely on the 'volatile' attribute. Remove volatile from arch_raw_cpu_ptr(). [ bigeasy: Added Linus' explanation why this_cpu_ptr() is not moved out of a preempt_disable() section without the 'volatile' attribute. ] Suggested-by: Linus Torvalds Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-2-bigeasy@linutronix.de --- arch/x86/include/asm/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a3c33b79fb86..13c0d63ed55e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -38,9 +38,9 @@ #define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - asm volatile("add " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (this_cpu_off), "0" (ptr)); \ + asm ("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) #else From 2d2f8f083ef29e9b7adfe5cb421368331543473f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:09 +0200 Subject: [PATCH 144/423] Revert "locking/local_lock: Make the empty local_lock_*() function a macro." With volatile removed from arch_raw_cpu_ptr() the compiler no longer creates the per-CPU reference. The usage of the macro can be reverted now. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-3-bigeasy@linutronix.de --- include/linux/local_lock_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 6d635e8306d6..975e33b793a7 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) -# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0) -# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0) -# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_lock_release(local_lock_t *l) { } +static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } From 273ba85b5e8b971ed28eb5c17e1638543be9237d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:10 +0200 Subject: [PATCH 145/423] Revert "mm/page_alloc: mark pagesets as __maybe_unused" The local_lock() is now using a proper static inline function which is enough for llvm to accept that the variable is used. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-4-bigeasy@linutronix.de --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2db95780e003..6e5b4488a0c5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -128,7 +128,7 @@ static DEFINE_MUTEX(pcp_batch_high_lock); struct pagesets { local_lock_t lock; }; -static DEFINE_PER_CPU(struct pagesets, pagesets) __maybe_unused = { +static DEFINE_PER_CPU(struct pagesets, pagesets) = { .lock = INIT_LOCAL_LOCK(lock), }; From c61759e581576d3330bd1d9490b4d7552e24da6b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:57 -0700 Subject: [PATCH 146/423] perf/x86: Add Intel Raptor Lake support From PMU's perspective, Raptor Lake is the same as the Alder Lake. The only difference is the event list, which will be supported in the perf tool later. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e88791b420ee..28f075e00c7a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6212,6 +6212,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: /* * Alder Lake has 2 types of CPU, core and atom. * From 2da202aa1c38bfe8841611a3d339892eb5579e2b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:58 -0700 Subject: [PATCH 147/423] perf/x86/cstate: Add Raptor Lake support Raptor Lake is Intel's successor to Alder lake. From the perspective of Intel cstate residency counters, there is nothing changed compared with Alder lake. Share adl_cstates with Alder lake. Update the comments for Raptor Lake. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/cstate.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index c6262b154c3a..5d7762288a24 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,7 +40,7 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL + * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,49 +51,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL + * ICL,TGL,RKL,ADL,RPL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL + * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, + * RPL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL + * KBL,CML,ICL,TGL,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL + * TNT,RKL,ADL,RPL * Scope: Package (physical package) * */ @@ -680,6 +681,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); From 82cd83047a9a80e52c0849e56885279166215310 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:59 -0700 Subject: [PATCH 148/423] perf/x86/msr: Add Raptor Lake CPU support Raptor Lake is Intel's successor to Alder lake. PPERF and SMI_COUNT MSRs are also supported. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-3-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 96c775abe31f..6d759f88315c 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; From ad4878d4d71d9ada913be2ad5b6d7f526a695b6f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:46:00 -0700 Subject: [PATCH 149/423] perf/x86/uncore: Add Raptor Lake uncore support The uncore PMU of the Raptor Lake is the same as Alder Lake. Add new PCIIDs of IMC for Raptor Lake. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-4-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 1 + arch/x86/events/intel/uncore_snb.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index e497da9bf427..7695dcae280e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f698a55bde81..4262351f52b6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -79,6 +79,10 @@ #define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 +#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700 +#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702 +#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ } }; From e3265a4386428d3d157d9565bb520aabff8b4bf0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 28 Mar 2022 13:01:12 -0700 Subject: [PATCH 150/423] perf/core: Inherit event_caps It was reported that some perf event setup can make fork failed on ARM64. It was the case of a group of mixed hw and sw events and it failed in perf_event_init_task() due to armpmu_event_init(). The ARM PMU code checks if all the events in a group belong to the same PMU except for software events. But it didn't set the event_caps of inherited events and no longer identify them as software events. Therefore the test failed in a child process. A simple reproducer is: $ perf stat -e '{cycles,cs,instructions}' perf bench sched messaging # Running 'sched/messaging' benchmark: perf: fork(): Invalid argument The perf stat was fine but the perf bench failed in fork(). Let's inherit the event caps from the parent. Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/20220328200112.457740-1-namhyung@kernel.org --- kernel/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index cfde994ce61c..3980efcf931d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11635,6 +11635,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->state = PERF_EVENT_STATE_INACTIVE; + if (parent_event) + event->event_caps = parent_event->event_caps; + if (event->attr.sigtrap) atomic_set(&event->event_limit, 1); From 4a263bf331c512849062805ef1b4ac40301a9829 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Mar 2022 08:49:02 -0700 Subject: [PATCH 151/423] perf/x86/intel: Don't extend the pseudo-encoding to GP counters The INST_RETIRED.PREC_DIST event (0x0100) doesn't count on SPR. perf stat -e cpu/event=0xc0,umask=0x0/,cpu/event=0x0,umask=0x1/ -C0 Performance counter stats for 'CPU(s) 0': 607,246 cpu/event=0xc0,umask=0x0/ 0 cpu/event=0x0,umask=0x1/ The encoding for INST_RETIRED.PREC_DIST is pseudo-encoding, which doesn't work on the generic counters. However, current perf extends its mask to the generic counters. The pseudo event-code for a fixed counter must be 0x00. Check and avoid extending the mask for the fixed counter event which using the pseudo-encoding, e.g., ref-cycles and PREC_DIST event. With the patch, perf stat -e cpu/event=0xc0,umask=0x0/,cpu/event=0x0,umask=0x1/ -C0 Performance counter stats for 'CPU(s) 0': 583,184 cpu/event=0xc0,umask=0x0/ 583,048 cpu/event=0x0,umask=0x1/ Fixes: 2de71ee153ef ("perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1648482543-14923-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 6 +++++- arch/x86/include/asm/perf_event.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 28f075e00c7a..eb17b968ca52 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con /* Disabled fixed counters which are not in CPUID */ c->idxmsk64 &= intel_ctrl; - if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) c->idxmsk64 |= (1ULL << num_counters) - 1; } c->idxmsk64 &= diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 58d9e4b1fa0a..b06e4c573add 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -241,6 +241,11 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + /* * We model BTS tracing as another fixed-mode PMC. * From e590928de7547454469693da9bc7ffd562e54b7e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Mar 2022 08:49:03 -0700 Subject: [PATCH 152/423] perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids On Sapphire Rapids, the FRONTEND_RETIRED.MS_FLOWS event requires the FRONTEND MSR value 0x8. However, the current FRONTEND MSR mask doesn't support it. Update intel_spr_extra_regs[] to support it. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1648482543-14923-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index eb17b968ca52..fc7f458eb3de 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), EVENT_EXTRA_END From a0827713e298d021d3c79ae7423aea408f3f7c3a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:20 +0800 Subject: [PATCH 153/423] perf/core: Don't pass task around when ctx sched in The current code pass task around for ctx_sched_in(), only to get perf_cgroup of the task, then update the timestamp of it and its ancestors and set them to active. But we can use cpuctx->cgrp to get active perf_cgroup and its ancestors since cpuctx->cgrp has been set before ctx_sched_in(). This patch remove the task argument in ctx_sched_in() and cleanup related code. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-2-zhouchengming@bytedance.com --- kernel/events/core.c | 58 ++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3980efcf931d..65450200691c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -574,8 +574,7 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, enum event_type_t event_type); static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task); + enum event_type_t event_type); static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); @@ -801,10 +800,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, - struct perf_event_context *ctx) +perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { - struct perf_cgroup *cgrp; + struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_cgroup *cgrp = cpuctx->cgrp; struct perf_cgroup_info *info; struct cgroup_subsys_state *css; @@ -813,10 +812,10 @@ perf_cgroup_set_timestamp(struct task_struct *task, * ensure we do not access cgroup data * unless we have the cgroup pinned (css_get) */ - if (!task || !ctx->nr_cgroups) + if (!cgrp) return; - cgrp = perf_cgroup_from_task(task, ctx); + WARN_ON_ONCE(!ctx->nr_cgroups); for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); @@ -869,14 +868,14 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) WARN_ON_ONCE(cpuctx->cgrp); /* * set cgrp before ctxsw in to allow - * event_filter_match() to not have to pass - * task around + * perf_cgroup_set_timestamp() in ctx_sched_in() + * to not have to pass task around * we pass the cpuctx->ctx to perf_cgroup_from_task() * because cgorup events are only per-cpu */ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); + cpu_ctx_sched_in(cpuctx, EVENT_ALL); } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -1118,8 +1117,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, - struct perf_event_context *ctx) +perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { } @@ -2713,8 +2711,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task); + enum event_type_t event_type); static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, @@ -2730,15 +2727,14 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, } static void perf_event_sched_in(struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, - struct task_struct *task) + struct perf_event_context *ctx) { - cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_PINNED); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_PINNED); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); } /* @@ -2788,7 +2784,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, else if (ctx_event_type & EVENT_PINNED) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, task_ctx, current); + perf_event_sched_in(cpuctx, task_ctx); perf_pmu_enable(cpuctx->ctx.pmu); } @@ -3011,7 +3007,7 @@ static void __perf_event_enable(struct perf_event *event, return; if (!event_filter_match(event)) { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); return; } @@ -3020,7 +3016,7 @@ static void __perf_event_enable(struct perf_event *event, * then don't put it on unless the group is on. */ if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); return; } @@ -3865,8 +3861,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task) + enum event_type_t event_type) { int is_active = ctx->is_active; @@ -3878,7 +3873,7 @@ ctx_sched_in(struct perf_event_context *ctx, if (is_active ^ EVENT_TIME) { /* start ctx time */ __update_context_time(ctx, false); - perf_cgroup_set_timestamp(task, ctx); + perf_cgroup_set_timestamp(cpuctx); /* * CPU-release for the below ->is_active store, * see __load_acquire() in perf_event_time_now() @@ -3909,12 +3904,11 @@ ctx_sched_in(struct perf_event_context *ctx, } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task) + enum event_type_t event_type) { struct perf_event_context *ctx = &cpuctx->ctx; - ctx_sched_in(ctx, cpuctx, event_type, task); + ctx_sched_in(ctx, cpuctx, event_type); } static void perf_event_context_sched_in(struct perf_event_context *ctx, @@ -3956,7 +3950,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, ctx, task); + perf_event_sched_in(cpuctx, ctx); if (cpuctx->sched_cb_usage && pmu->sched_task) pmu->sched_task(cpuctx->task_ctx, true); @@ -4267,7 +4261,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) if (cpu_event) rotate_ctx(&cpuctx->ctx, cpu_event); - perf_event_sched_in(cpuctx, task_ctx, current); + perf_event_sched_in(cpuctx, task_ctx); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -4339,7 +4333,7 @@ static void perf_event_enable_on_exec(int ctxn) clone_ctx = unclone_ctx(ctx); ctx_resched(cpuctx, ctx, event_type); } else { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); } perf_ctx_unlock(cpuctx, ctx); From 6875186aea5ce09a644758d9193265da1cc187c7 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:21 +0800 Subject: [PATCH 154/423] perf/core: Use perf_cgroup_info->active to check if cgroup is active Since we use perf_cgroup_set_timestamp() to start cgroup time and set active to 1, then use update_cgrp_time_from_cpuctx() to stop cgroup time and set active to 0. We can use info->active directly to check if cgroup is active. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-3-zhouchengming@bytedance.com --- kernel/events/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 65450200691c..a08fb92b3934 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -780,7 +780,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, static inline void update_cgrp_time_from_event(struct perf_event *event) { struct perf_cgroup_info *info; - struct perf_cgroup *cgrp; /* * ensure we access cgroup data only when needed and @@ -789,14 +788,12 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) if (!is_cgroup_event(event)) return; - cgrp = perf_cgroup_from_task(current, event->ctx); + info = this_cpu_ptr(event->cgrp->info); /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { - info = this_cpu_ptr(event->cgrp->info); + if (info->active) __update_cgrp_time(info, perf_clock(), true); - } } static inline void From 96492a6c558acb56124844d1409d9ef8624a0322 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:22 +0800 Subject: [PATCH 155/423] perf/core: Fix perf_cgroup_switch() There is a race problem that can trigger WARN_ON_ONCE(cpuctx->cgrp) in perf_cgroup_switch(). CPU1 CPU2 perf_cgroup_sched_out(prev, next) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) perf_cgroup_switch(prev, PERF_CGROUP_SWOUT) cgroup_migrate_execute() task->cgroups = ? perf_cgroup_attach() task_function_call(task, __perf_cgroup_move) perf_cgroup_sched_in(prev, next) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) perf_cgroup_switch(next, PERF_CGROUP_SWIN) __perf_cgroup_move() perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN) The commit a8d757ef076f ("perf events: Fix slow and broken cgroup context switch code") want to skip perf_cgroup_switch() when the perf_cgroup of "prev" and "next" are the same. But task->cgroups can change in concurrent with context_switch() in cgroup_migrate_execute(). If cgrp1 == cgrp2 in sched_out(), cpuctx won't do sched_out. Then task->cgroups changed cause cgrp1 != cgrp2 in sched_in(), cpuctx will do sched_in. So trigger WARN_ON_ONCE(cpuctx->cgrp). Even though __perf_cgroup_move() will be synchronized as the context switch disables the interrupt, context_switch() still can see the task->cgroups is changing in the middle, since task->cgroups changed before sending IPI. So we have to combine perf_cgroup_sched_in() into perf_cgroup_sched_out(), unified into perf_cgroup_switch(), to fix the incosistency between perf_cgroup_sched_out() and perf_cgroup_sched_in(). But we can't just compare prev->cgroups with next->cgroups to decide whether to skip cpuctx sched_out/in since the prev->cgroups is changing too. For example: CPU1 CPU2 cgroup_migrate_execute() prev->cgroups = ? perf_cgroup_attach() task_function_call(task, __perf_cgroup_move) perf_cgroup_switch(task) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) cpuctx sched_out/in ... task_function_call() will return -ESRCH In the above example, prev->cgroups changing cause (cgrp1 == cgrp2) to be true, so skip cpuctx sched_out/in. And later task_function_call() would return -ESRCH since the prev task isn't running on cpu anymore. So we would leave perf_events of the old prev->cgroups still sched on the CPU, which is wrong. The solution is that we should use cpuctx->cgrp to compare with the next task's perf_cgroup. Since cpuctx->cgrp can only be changed on local CPU, and we have irq disabled, we can read cpuctx->cgrp to compare without holding ctx lock. Fixes: a8d757ef076f ("perf events: Fix slow and broken cgroup context switch code") Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-4-zhouchengming@bytedance.com --- kernel/events/core.c | 132 ++++++++----------------------------------- 1 file changed, 25 insertions(+), 107 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a08fb92b3934..bdeb41fe7f15 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -824,17 +824,12 @@ perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); -#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ -#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ - /* * reschedule events based on the cgroup constraint of task. - * - * mode SWOUT : schedule out everything - * mode SWIN : schedule in based on cgroup for next */ -static void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task) { + struct perf_cgroup *cgrp; struct perf_cpu_context *cpuctx, *tmp; struct list_head *list; unsigned long flags; @@ -845,35 +840,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) */ local_irq_save(flags); + cgrp = perf_cgroup_from_task(task, NULL); + list = this_cpu_ptr(&cgrp_cpuctx_list); list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + if (READ_ONCE(cpuctx->cgrp) == cgrp) + continue; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (mode & PERF_CGROUP_SWOUT) { - cpu_ctx_sched_out(cpuctx, EVENT_ALL); - /* - * must not be done before ctxswout due - * to event_filter_match() in event_sched_out() - */ - cpuctx->cgrp = NULL; - } + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + /* + * must not be done before ctxswout due + * to update_cgrp_time_from_cpuctx() in + * ctx_sched_out() + */ + cpuctx->cgrp = cgrp; + /* + * set cgrp before ctxsw in to allow + * perf_cgroup_set_timestamp() in ctx_sched_in() + * to not have to pass task around + */ + cpu_ctx_sched_in(cpuctx, EVENT_ALL); - if (mode & PERF_CGROUP_SWIN) { - WARN_ON_ONCE(cpuctx->cgrp); - /* - * set cgrp before ctxsw in to allow - * perf_cgroup_set_timestamp() in ctx_sched_in() - * to not have to pass task around - * we pass the cpuctx->ctx to perf_cgroup_from_task() - * because cgorup events are only per-cpu - */ - cpuctx->cgrp = perf_cgroup_from_task(task, - &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL); - } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } @@ -881,58 +872,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(next, NULL); - - /* - * only schedule out current cgroup events if we know - * that we are switching to a different cgroup. Otherwise, - * do no touch the cgroup events. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); - - rcu_read_unlock(); -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(prev, NULL); - - /* - * only need to schedule in cgroup events if we are changing - * cgroup during ctxsw. Cgroup events were not scheduled - * out of ctxsw out if that was not the case. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWIN); - - rcu_read_unlock(); -} - static int perf_cgroup_ensure_storage(struct perf_event *event, struct cgroup_subsys_state *css) { @@ -1096,16 +1035,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, { } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ -} - static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, struct perf_event_attr *attr, struct perf_event *group_leader) @@ -1118,11 +1047,6 @@ perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { } -static inline void -perf_cgroup_switch(struct task_struct *task, struct task_struct *next) -{ -} - static inline u64 perf_cgroup_event_time(struct perf_event *event) { return 0; @@ -1142,6 +1066,10 @@ static inline void perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx) { } + +static void perf_cgroup_switch(struct task_struct *task) +{ +} #endif /* @@ -3661,7 +3589,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_out(task, next); + perf_cgroup_switch(next); } /* @@ -3975,16 +3903,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, struct perf_event_context *ctx; int ctxn; - /* - * If cgroup events exist on this CPU, then we need to check if we have - * to switch in PMU state; cgroup event are system-wide mode only. - * - * Since cgroup events are CPU events, we must schedule these in before - * we schedule in the task events. - */ - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_in(prev, task); - for_each_task_context_nr(ctxn) { ctx = task->perf_event_ctxp[ctxn]; if (likely(!ctx)) @@ -13556,7 +13474,7 @@ static int __perf_cgroup_move(void *info) { struct task_struct *task = info; rcu_read_lock(); - perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); + perf_cgroup_switch(task); rcu_read_unlock(); return 0; } From e19cd0b6fa5938c51d7b928010d584f0de93913a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:23 +0800 Subject: [PATCH 156/423] perf/core: Always set cpuctx cgrp when enable cgroup event When enable a cgroup event, cpuctx->cgrp setting is conditional on the current task cgrp matching the event's cgroup, so have to do it for every new event. It brings complexity but no advantage. To keep it simple, this patch would always set cpuctx->cgrp when enable the first cgroup event, and reset to NULL when disable the last cgroup event. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-5-zhouchengming@bytedance.com --- kernel/events/core.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index bdeb41fe7f15..23bb19716ad3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -967,22 +967,10 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct */ cpuctx = container_of(ctx, struct perf_cpu_context, ctx); - /* - * Since setting cpuctx->cgrp is conditional on the current @cgrp - * matching the event's cgroup, we must do this for every new event, - * because if the first would mismatch, the second would not try again - * and we would leave cpuctx->cgrp unset. - */ - if (ctx->is_active && !cpuctx->cgrp) { - struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx); - - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) - cpuctx->cgrp = cgrp; - } - if (ctx->nr_cgroups++) return; + cpuctx->cgrp = perf_cgroup_from_task(current, ctx); list_add(&cpuctx->cgrp_cpuctx_entry, per_cpu_ptr(&cgrp_cpuctx_list, event->cpu)); } @@ -1004,9 +992,7 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c if (--ctx->nr_cgroups) return; - if (ctx->is_active && cpuctx->cgrp) - cpuctx->cgrp = NULL; - + cpuctx->cgrp = NULL; list_del(&cpuctx->cgrp_cpuctx_entry); } From 9ce02f0fc68326dd1f87a0a3a4c6ae7fdd39e6f6 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 24 Mar 2022 11:37:42 +0900 Subject: [PATCH 157/423] x86/bug: Prevent shadowing in __WARN_FLAGS The macro __WARN_FLAGS() uses a local variable named "f". This being a common name, there is a risk of shadowing other variables. For example, GCC would yield: | In file included from ./include/linux/bug.h:5, | from ./include/linux/cpumask.h:14, | from ./arch/x86/include/asm/cpumask.h:5, | from ./arch/x86/include/asm/msr.h:11, | from ./arch/x86/include/asm/processor.h:22, | from ./arch/x86/include/asm/timex.h:5, | from ./include/linux/timex.h:65, | from ./include/linux/time32.h:13, | from ./include/linux/time.h:60, | from ./include/linux/stat.h:19, | from ./include/linux/module.h:13, | from virt/lib/irqbypass.mod.c:1: | ./include/linux/rcupdate.h: In function 'rcu_head_after_call_rcu': | ./arch/x86/include/asm/bug.h:80:21: warning: declaration of 'f' shadows a parameter [-Wshadow] | 80 | __auto_type f = BUGFLAG_WARNING|(flags); \ | | ^ | ./include/asm-generic/bug.h:106:17: note: in expansion of macro '__WARN_FLAGS' | 106 | __WARN_FLAGS(BUGFLAG_ONCE | \ | | ^~~~~~~~~~~~ | ./include/linux/rcupdate.h:1007:9: note: in expansion of macro 'WARN_ON_ONCE' | 1007 | WARN_ON_ONCE(func != (rcu_callback_t)~0L); | | ^~~~~~~~~~~~ | In file included from ./include/linux/rbtree.h:24, | from ./include/linux/mm_types.h:11, | from ./include/linux/buildid.h:5, | from ./include/linux/module.h:14, | from virt/lib/irqbypass.mod.c:1: | ./include/linux/rcupdate.h:1001:62: note: shadowed declaration is here | 1001 | rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) | | ~~~~~~~~~~~~~~~^ For reference, sparse also warns about it, c.f. [1]. This patch renames the variable from f to __flags (with two underscore prefixes as suggested in the Linux kernel coding style [2]) in order to prevent collisions. [1] https://lore.kernel.org/all/CAFGhKbyifH1a+nAMCvWM88TK6fpNPdzFtUXPmRGnnQeePV+1sw@mail.gmail.com/ [2] Linux kernel coding style, section 12) Macros, Enums and RTL, paragraph 5) namespace collisions when defining local variables in macros resembling functions https://www.kernel.org/doc/html/latest/process/coding-style.html#macros-enums-and-rtl Fixes: bfb1a7c91fb7 ("x86/bug: Merge annotate_reachable() into_BUG_FLAGS() asm") Signed-off-by: Vincent Mailhol Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nick Desaulniers Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20220324023742.106546-1-mailhol.vincent@wanadoo.fr --- arch/x86/include/asm/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 4d20a293c6fd..aaf0cb0db4ae 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -78,9 +78,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ - __auto_type f = BUGFLAG_WARNING|(flags); \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) From d139bca4b824ffb9731763c31b271a24b595948a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Mar 2022 12:33:31 +0100 Subject: [PATCH 158/423] objtool: Fix IBT tail-call detection Objtool reports: arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_blocks_avx() falls through to next function poly1305_blocks_x86_64() arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_emit_avx() falls through to next function poly1305_emit_x86_64() arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_blocks_avx2() falls through to next function poly1305_blocks_x86_64() Which reads like: 0000000000000040 : 40: f3 0f 1e fa endbr64 ... 0000000000000400 : 400: f3 0f 1e fa endbr64 404: 44 8b 47 14 mov 0x14(%rdi),%r8d 408: 48 81 fa 80 00 00 00 cmp $0x80,%rdx 40f: 73 09 jae 41a 411: 45 85 c0 test %r8d,%r8d 414: 0f 84 2a fc ff ff je 44 ... These are simple conditional tail-calls and *should* be recognised as such by objtool, however due to a mistake in commit 08f87a93c8ec ("objtool: Validate IBT assumptions") this is failing. Specifically, the jump_dest is +4, this means the instruction pointed at will not be ENDBR and as such it will fail the second clause of is_first_func_insn() that was supposed to capture this exact case. Instead, have is_first_func_insn() look at the previous instruction. Fixes: 08f87a93c8ec ("objtool: Validate IBT assumptions") Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220322115125.811582125@infradead.org --- tools/objtool/check.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6de5085e3e5a..b848e1ddd5d8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1239,11 +1239,20 @@ static bool same_function(struct instruction *insn1, struct instruction *insn2) return insn1->func->pfunc == insn2->func->pfunc; } -static bool is_first_func_insn(struct instruction *insn) +static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) { - return insn->offset == insn->func->offset || - (insn->type == INSN_ENDBR && - insn->offset == insn->func->offset + insn->len); + if (insn->offset == insn->func->offset) + return true; + + if (ibt) { + struct instruction *prev = prev_insn_same_sym(file, insn); + + if (prev && prev->type == INSN_ENDBR && + insn->offset == insn->func->offset + prev->len) + return true; + } + + return false; } /* @@ -1327,7 +1336,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest->func->pfunc = insn->func; } else if (!same_function(insn, insn->jump_dest) && - is_first_func_insn(insn->jump_dest)) { + is_first_func_insn(file, insn->jump_dest)) { /* internal sibling call (without reloc) */ add_call_dest(file, insn, insn->jump_dest->func, true); } From 7a53f408902d913cd541b4f8ad7dbcd4961f5b82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Mar 2022 23:35:01 +0100 Subject: [PATCH 159/423] objtool: Fix SLS validation for kcov tail-call replacement Since not all compilers have a function attribute to disable KCOV instrumentation, objtool can rewrite KCOV instrumentation in noinstr functions as per commit: f56dae88a81f ("objtool: Handle __sanitize_cov*() tail calls") However, this has subtle interaction with the SLS validation from commit: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") In that when a tail-call instrucion is replaced with a RET an additional INT3 instruction is also written, but is not represented in the decoded instruction stream. This then leads to false positive missing INT3 objtool warnings in noinstr code. Instead of adding additional struct instruction objects, mark the RET instruction with retpoline_safe to suppress the warning (since we know there really is an INT3). Fixes: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220323230712.GA8939@worktop.programming.kicks-ass.net --- tools/objtool/check.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b848e1ddd5d8..bd0c2c828940 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1155,6 +1155,17 @@ static void annotate_call_site(struct objtool_file *file, : arch_nop_insn(insn->len)); insn->type = sibling ? INSN_RETURN : INSN_NOP; + + if (sibling) { + /* + * We've replaced the tail-call JMP insn by two new + * insn: RET; INT3, except we only have a single struct + * insn here. Mark it retpoline_safe to avoid the SLS + * warning, instead of adding another insn. + */ + insn->retpoline_safe = true; + } + return; } From 00c22013467069197dc006c943ca1f0395ca8aaa Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Wed, 30 Mar 2022 09:43:06 -0700 Subject: [PATCH 160/423] KVM: SEV: Add cond_resched() to loop in sev_clflush_pages() Add resched to avoid warning from sev_clflush_pages() with large number of pages. Signed-off-by: Peter Gonda Cc: Sean Christopherson Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <20220330164306.2376085-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 75fa6dd268f0..c2fe89ecdb2d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -465,6 +465,7 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) page_virtual = kmap_atomic(pages[i]); clflush_cache_range(page_virtual, PAGE_SIZE); kunmap_atomic(page_virtual); + cond_resched(); } } From 1d0e84806047f38027d7572adb4702ef7c09b317 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 31 Mar 2022 22:13:59 +0000 Subject: [PATCH 161/423] KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded Resolve nx_huge_pages to true/false when kvm.ko is loaded, leaving it as -1 is technically undefined behavior when its value is read out by param_get_bool(), as boolean values are supposed to be '0' or '1'. Alternatively, KVM could define a custom getter for the param, but the auto value doesn't depend on the vendor module in any way, and printing "auto" would be unnecessarily unfriendly to the user. In addition to fixing the undefined behavior, resolving the auto value also fixes the scenario where the auto value resolves to N and no vendor module is loaded. Previously, -1 would result in Y being printed even though KVM would ultimately disable the mitigation. Rename the existing MMU module init/exit helpers to clarify that they're invoked with respect to the vendor module, and add comments to document why KVM has two separate "module init" flows. ========================================================================= UBSAN: invalid-load in kernel/params.c:320:33 load of value 255 is not a valid value for type '_Bool' CPU: 6 PID: 892 Comm: tail Not tainted 5.17.0-rc3+ #799 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x34/0x44 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value.cold+0x43/0x48 param_get_bool.cold+0xf/0x14 param_attr_show+0x55/0x80 module_attr_show+0x1c/0x30 sysfs_kf_seq_show+0x93/0xc0 seq_read_iter+0x11c/0x450 new_sync_read+0x11b/0x1a0 vfs_read+0xf0/0x190 ksys_read+0x5f/0xe0 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae ========================================================================= Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Cc: stable@vger.kernel.org Reported-by: Bruno Goncalves Reported-by: Jan Stancek Signed-off-by: Sean Christopherson Message-Id: <20220331221359.3912754-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++-- arch/x86/kvm/mmu/mmu.c | 20 ++++++++++++++++---- arch/x86/kvm/x86.c | 20 ++++++++++++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d23e80a56eb8..0d37ba442de3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1585,8 +1585,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) -int kvm_mmu_module_init(void); -void kvm_mmu_module_exit(void); +void kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8f19ea752704..f9080ee50ffa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6237,12 +6237,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } -int kvm_mmu_module_init(void) +/* + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as + * its default value of -1 is technically undefined behavior for a boolean. + */ +void kvm_mmu_x86_module_init(void) { - int ret = -ENOMEM; - if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); +} + +/* + * The bulk of the MMU initialization is deferred until the vendor module is + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need + * to be reset when a potentially different vendor module is loaded. + */ +int kvm_mmu_vendor_module_init(void) +{ + int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an @@ -6290,7 +6302,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } -void kvm_mmu_module_exit(void) +void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c0ca599a353..de49a88df1c2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8926,7 +8926,7 @@ int kvm_arch_init(void *opaque) } kvm_nr_uret_msrs = 0; - r = kvm_mmu_module_init(); + r = kvm_mmu_vendor_module_init(); if (r) goto out_free_percpu; @@ -8974,7 +8974,7 @@ void kvm_arch_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; - kvm_mmu_module_exit(); + kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); #ifdef CONFIG_KVM_XEN @@ -12986,3 +12986,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); + +static int __init kvm_x86_init(void) +{ + kvm_mmu_x86_module_init(); + return 0; +} +module_init(kvm_x86_init); + +static void __exit kvm_x86_exit(void) +{ + /* + * If module_init() is implemented, module_exit() must also be + * implemented to allow module unload. + */ +} +module_exit(kvm_x86_exit); From 3203a56a0f0eaaf4ea7fc01467378c4bce3841ff Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 1 Apr 2022 08:35:30 +0000 Subject: [PATCH 162/423] KVM: x86/mmu: remove unnecessary flush_workqueue() All work currently pending will be done first by calling destroy_workqueue, so there is unnecessary to flush it explicitly. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Sean Christopherson Message-Id: <20220401083530.2407703-1-lv.ruyi@zte.com.cn> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d71d177ae6b8..c472769e0300 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -51,7 +51,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); + /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); From c1be1ef1b4a7589878d63673b7b322856989064e Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sun, 3 Apr 2022 13:57:36 +0700 Subject: [PATCH 163/423] Documentation: kvm: Add missing line break in api.rst Add missing line break separator between literal block and description of KVM_EXIT_RISCV_SBI. This fixes: /Documentation/virt/kvm/api.rst:6118: WARNING: Literal block ends without a blank line; unexpected unindent. Fixes: da40d85805937d (RISC-V: KVM: Document RISC-V specific parts of KVM API, 2021-09-27) Cc: Anup Patel Cc: Paolo Bonzini Cc: Jonathan Corbet Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-riscv@lists.infradead.org Signed-off-by: Bagas Sanjaya Message-Id: <20220403065735.23859-1-bagasdotme@gmail.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d13fa6600467..85c7abc51af5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6190,6 +6190,7 @@ Valid values for 'type' are: unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has done a SBI call which is not handled by KVM RISC-V kernel module. The details of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The From c31bc0461f861ec68e026b5d4d27394bcb722068 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Mar 2022 16:25:52 +0200 Subject: [PATCH 164/423] powerpc/pseries/vas: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the pseries vas sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Haren Myneni Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20220329142552.558339-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/vas-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 4a7fcde5afc0..909535ca513a 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -99,6 +99,7 @@ static struct attribute *vas_def_capab_attrs[] = { &nr_used_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_def_capab); static struct attribute *vas_qos_capab_attrs[] = { &nr_total_credits_attribute.attr, @@ -106,6 +107,7 @@ static struct attribute *vas_qos_capab_attrs[] = { &update_total_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_qos_capab); static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -154,13 +156,13 @@ static const struct sysfs_ops vas_sysfs_ops = { static struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_def_capab_attrs, + .default_groups = vas_def_capab_groups, }; static struct kobj_type vas_qos_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_qos_capab_attrs, + .default_groups = vas_qos_capab_groups, }; static char *vas_caps_kobj_name(struct vas_caps_entry *centry, From cdb4f26a63c391317e335e6e683a614358e70aeb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Jan 2022 14:31:51 +0100 Subject: [PATCH 165/423] kobject: kobj_type: remove default_attrs Now that all in-kernel users of default_attrs for the kobj_type are gone and converted to properly use the default_groups pointer instead, it can be safely removed. There is one standard way to create sysfs files in a kobj_type, and not two like before, causing confusion as to which should be used. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20220106133151.607703-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 13 ------------- include/linux/kobject.h | 1 - lib/kobject.c | 32 -------------------------------- 3 files changed, 46 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 42dcf96881b6..a12ac0356c69 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -703,19 +703,6 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) ktype = get_ktype(kobj); if (ktype) { - struct attribute **kattr; - - /* - * Change owner of the default attributes associated with the - * ktype of @kobj. - */ - for (kattr = ktype->default_attrs; kattr && *kattr; kattr++) { - error = sysfs_file_change_owner(kobj, (*kattr)->name, - kuid, kgid); - if (error) - return error; - } - /* * Change owner of the default groups associated with the * ktype of @kobj. diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c7b47399b36a..57fb972fea05 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -120,7 +120,6 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; /* use default_groups instead */ const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); diff --git a/lib/kobject.c b/lib/kobject.c index 56fa037501b5..5f0e71ab292c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -54,32 +54,6 @@ void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) kobj->ktype->get_ownership(kobj, uid, gid); } -/* - * populate_dir - populate directory with attributes. - * @kobj: object we're working on. - * - * Most subsystems have a set of default attributes that are associated - * with an object that registers with them. This is a helper called during - * object registration that loops through the default attributes of the - * subsystem and creates attributes files for them in sysfs. - */ -static int populate_dir(struct kobject *kobj) -{ - const struct kobj_type *t = get_ktype(kobj); - struct attribute *attr; - int error = 0; - int i; - - if (t && t->default_attrs) { - for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) { - error = sysfs_create_file(kobj, attr); - if (error) - break; - } - } - return error; -} - static int create_dir(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); @@ -90,12 +64,6 @@ static int create_dir(struct kobject *kobj) if (error) return error; - error = populate_dir(kobj); - if (error) { - sysfs_remove_dir(kobj); - return error; - } - if (ktype) { error = sysfs_create_groups(kobj, ktype->default_groups); if (error) { From 6150f276073a1480030242a7e006a89e161d6cd6 Mon Sep 17 00:00:00 2001 From: Kyle Copperfield Date: Sat, 20 Nov 2021 13:23:02 +0100 Subject: [PATCH 166/423] media: rockchip/rga: do proper error checking in probe The latest fix for probe error handling contained a typo that causes probing to fail with the following message: rockchip-rga: probe of ff680000.rga failed with error -12 This patch fixes the typo. Fixes: e58430e1d4fd (media: rockchip/rga: fix error handling in probe) Reviewed-by: Dragan Simic Signed-off-by: Kyle Copperfield Reviewed-by: Kieran Bingham Reviewed-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rga/rga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 4de5e8d2b261..3d3d1062e212 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -892,7 +892,7 @@ static int rga_probe(struct platform_device *pdev) } rga->dst_mmu_pages = (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3); - if (rga->dst_mmu_pages) { + if (!rga->dst_mmu_pages) { ret = -ENOMEM; goto free_src_pages; } From fa4dcc880390fbedf4118e9f88a6b13363e0a7a1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Mar 2022 21:19:56 +0800 Subject: [PATCH 167/423] irq/qcom-mpm: Fix build error without MAILBOX If MAILBOX is n, building fails: drivers/irqchip/irq-qcom-mpm.o: In function `mpm_pd_power_off': irq-qcom-mpm.c:(.text+0x174): undefined reference to `mbox_send_message' irq-qcom-mpm.c:(.text+0x174): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `mbox_send_message' Make QCOM_MPM depends on MAILBOX to fix this. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Signed-off-by: YueHaibing Acked-by: Shawn Guo Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220317131956.30004-1-yuehaibing@huawei.com --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 680d2fcf2686..15edb9a6fcae 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -433,6 +433,7 @@ config QCOM_PDC config QCOM_MPM tristate "QCOM MPM" depends on ARCH_QCOM + depends on MAILBOX select IRQ_DOMAIN_HIERARCHY help MSM Power Manager driver to manage and configure wakeup From 76ff614a79152cee07a2c48080c3dc91c56f0f1d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Mar 2022 10:51:00 +0800 Subject: [PATCH 168/423] irqchip/irq-qcom-mpm: fix return value check in qcom_mpm_init() If devm_platform_ioremap_resource() fails, it never returns NULL, replace NULL check with IS_ERR(). Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Acked-by: Shawn Guo Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220316025100.1758413-1-yangyingliang@huawei.com --- drivers/irqchip/irq-qcom-mpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index eea5a753618c..d30614661eea 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -375,7 +375,7 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent) raw_spin_lock_init(&priv->lock); priv->base = devm_platform_ioremap_resource(pdev, 0); - if (!priv->base) + if (IS_ERR(priv->base)) return PTR_ERR(priv->base); for (i = 0; i < priv->reg_stride; i++) { From af27e41612ec7e5b4783f589b753a7c31a37aac8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 17 Mar 2022 09:49:02 +0000 Subject: [PATCH 169/423] irqchip/gic-v4: Wait for GICR_VPENDBASER.Dirty to clear before descheduling The way KVM drives GICv4.{0,1} is as follows: - vcpu_load() makes the VPE resident, instructing the RD to start scanning for interrupts - just before entering the guest, we check that the RD has finished scanning and that we can start running the vcpu - on preemption, we deschedule the VPE by making it invalid on the RD However, we are preemptible between the first two steps. If it so happens *and* that the RD was still scanning, we nonetheless write to the GICR_VPENDBASER register while Dirty is set, and bad things happen (we're in UNPRED land). This affects both the 4.0 and 4.1 implementations. Make sure Dirty is cleared before performing the deschedule, meaning that its_clear_vpend_valid() becomes a sort of full VPE residency barrier. Reported-by: Jingyi Wang Tested-by: Nianyao Tang Signed-off-by: Marc Zyngier Fixes: 57e3cebd022f ("KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit") Link: https://lore.kernel.org/r/4aae10ba-b39a-5f84-754b-69c2eb0a2c03@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cd772973114a..a0fc764ec9dc 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3011,18 +3011,12 @@ static int __init allocate_lpi_tables(void) return 0; } -static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +static u64 read_vpend_dirty_clear(void __iomem *vlpi_base) { u32 count = 1000000; /* 1s! */ bool clean; u64 val; - val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - val &= ~clr; - val |= set; - gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - do { val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); clean = !(val & GICR_VPENDBASER_Dirty); @@ -3033,10 +3027,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) } } while (!clean && count); - if (unlikely(val & GICR_VPENDBASER_Dirty)) { + if (unlikely(!clean)) pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + + return val; +} + +static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +{ + u64 val; + + /* Make sure we wait until the RD is done with the initial scan */ + val = read_vpend_dirty_clear(vlpi_base); + val &= ~GICR_VPENDBASER_Valid; + val &= ~clr; + val |= set; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + val = read_vpend_dirty_clear(vlpi_base); + if (unlikely(val & GICR_VPENDBASER_Dirty)) val |= GICR_VPENDBASER_PendingLast; - } return val; } From 0df6664531a12cdd8fc873f0cac0dcb40243d3e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Mar 2022 16:50:32 +0000 Subject: [PATCH 170/423] irqchip/gic-v3: Fix GICR_CTLR.RWP polling It turns out that our polling of RWP is totally wrong when checking for it in the redistributors, as we test the *distributor* bit index, whereas it is a different bit number in the RDs... Oopsie boo. This is embarassing. Not only because it is wrong, but also because it took *8 years* to notice the blunder... Just fix the damn thing. Fixes: 021f653791ad ("irqchip: gic-v3: Initial support for GICv3") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andre Przywara Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220315165034.794482-2-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0efe1a9a9f3b..9b6316582515 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & bit) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base) /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } #ifdef CONFIG_ARM64 From 544808f7e21cb9ccdb8f3aa7de594c05b1419061 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 4 Apr 2022 12:08:42 +0100 Subject: [PATCH 171/423] irqchip/gic, gic-v3: Prevent GSI to SGI translations At the moment the GIC IRQ domain translation routine happily converts ACPI table GSI numbers below 16 to GIC SGIs (Software Generated Interrupts aka IPIs). On the Devicetree side we explicitly forbid this translation, actually the function will never return HWIRQs below 16 when using a DT based domain translation. We expect SGIs to be handled in the first part of the function, and any further occurrence should be treated as a firmware bug, so add a check and print to report this explicitly and avoid lengthy debug sessions. Fixes: 64b499d8df40 ("irqchip/gic-v3: Configure SGIs as standard interrupts") Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220404110842.2882446-1-andre.przywara@arm.com --- drivers/irqchip/irq-gic-v3.c | 6 ++++++ drivers/irqchip/irq-gic.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 9b6316582515..b252d5534547 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1466,6 +1466,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 58ba835bee1f..09c710ecc387 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1123,6 +1123,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; From c9db8a30d9f091aa571b5fb7c3f434cde107b02c Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 5 Apr 2022 15:36:22 +0300 Subject: [PATCH 172/423] ALSA: hda/i915 - skip acomp init if no matching display In systems with only a discrete i915 GPU, the acomp init will always timeout for the PCH HDA controller instance. Avoid the timeout by checking the PCI device hierarchy whether any display class PCI device can be found on the system, and at the same level as the HDA PCI device. If found, proceed with the acomp init, which will wait until i915 probe is complete and component binding can proceed. If no matching display device is found, the audio component bind can be safely skipped. The bind timeout will still be hit if the display is present in the system, but i915 driver does not bind to it by configuration choice or probe error. In this case the 60sec timeout will be hit. Signed-off-by: Kai Vehmanen Acked-by: Lucas De Marchi Link: https://lore.kernel.org/r/20220405123622.2874457-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/hdac_i915.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index efe810af28c5..48b8ed752b69 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -116,16 +116,25 @@ static int i915_component_master_match(struct device *dev, int subcomponent, return 0; } -/* check whether intel graphics is present */ -static bool i915_gfx_present(void) +/* check whether Intel graphics is present and reachable */ +static int i915_gfx_present(struct pci_dev *hdac_pci) { - static const struct pci_device_id ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_ANY_ID), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16 }, - {} - }; - return pci_dev_present(ids); + unsigned int class = PCI_BASE_CLASS_DISPLAY << 16; + struct pci_dev *display_dev = NULL; + bool match = false; + + do { + display_dev = pci_get_class(class, display_dev); + + if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL && + connectivity_check(display_dev, hdac_pci)) + match = true; + + pci_dev_put(display_dev); + + } while (!match && display_dev); + + return match; } /** @@ -145,7 +154,7 @@ int snd_hdac_i915_init(struct hdac_bus *bus) struct drm_audio_component *acomp; int err; - if (!i915_gfx_present()) + if (!i915_gfx_present(to_pci_dev(bus->dev))) return -ENODEV; err = snd_hdac_acomp_init(bus, NULL, From 1ef8715975de8bd481abbd0839ed4f49d9e5b0ff Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:08 +0200 Subject: [PATCH 173/423] ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: sound/usb/midi.c: In function ‘snd_usbmidi_out_endpoint_create’: sound/usb/midi.c:1389:2: error: case label does not reduce to an integer constant case USB_ID(0xfc08, 0x0101): /* Unknown vendor Cable */ ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. [ A slight correction with parentheses around the argument by tiwai ] Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220405151517.29753-3-bp@alien8.de Signed-off-by: Takashi Iwai --- sound/usb/usbaudio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 167834133b9b..b8359a0aa008 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -8,7 +8,7 @@ */ /* handling of USB vendor/product ID pairs as 32-bit numbers */ -#define USB_ID(vendor, product) (((vendor) << 16) | (product)) +#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product)) #define USB_ID_VENDOR(id) ((id) >> 16) #define USB_ID_PRODUCT(id) ((u16)(id)) From ac2a3feefad549814f5e7cca30be07a255c8494a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Apr 2022 19:49:26 +0200 Subject: [PATCH 174/423] ACPI: bus: Eliminate acpi_bus_get_device() Replace the last instance of acpi_bus_get_device(), added recently by commit 87e59b36e5e2 ("spi: Support selection of the index of the ACPI Spi Resource before alloc"), with acpi_fetch_acpi_dev() and finally drop acpi_bus_get_device() that has no more users. Signed-off-by: Rafael J. Wysocki Acked-by: Mark Brown --- drivers/acpi/scan.c | 13 ------------- drivers/spi/spi.c | 3 ++- include/acpi/acpi_bus.h | 1 - 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9efbfe087de7..762b61f67e6c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -588,19 +588,6 @@ static struct acpi_device *handle_to_device(acpi_handle handle, return adev; } -int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device) -{ - if (!device) - return -EINVAL; - - *device = handle_to_device(handle, NULL); - if (!*device) - return -ENODEV; - - return 0; -} -EXPORT_SYMBOL(acpi_bus_get_device); - /** * acpi_fetch_acpi_dev - Retrieve ACPI device object. * @handle: ACPI handle associated with the requested ACPI device object. diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c4dd1200fe99..9bb5400af205 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2406,7 +2406,8 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) } else { struct acpi_device *adev; - if (acpi_bus_get_device(parent_handle, &adev)) + adev = acpi_fetch_acpi_dev(parent_handle); + if (!adev) return -ENODEV; ctlr = acpi_spi_find_controller_by_adev(adev); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3f7f01f03869..c4b78c21d793 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -509,7 +509,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); * External Functions */ -int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device); struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); From 9435be734ae9de020072bd4443d46e02d92564d1 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Wed, 23 Mar 2022 09:45:58 +0800 Subject: [PATCH 175/423] btrfs: zoned: remove redundant condition in btrfs_run_delalloc_range The logic !A || A && B is equivalent to !A || B. so we can make code clear. Note: though it's preferred to be in the more human readable form, there have been repeated reports and patches as the expression is detected by tools so apply it to reduce the load. Reviewed-by: Johannes Thumshirn Signed-off-by: Haowen Bai Reviewed-by: David Sterba [ add note ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5aab6af88349..286ab7c27db0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2016,8 +2016,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page * to use run_delalloc_nocow() here, like for regular * preallocated inodes. */ - ASSERT(!zoned || - (zoned && btrfs_is_data_reloc_root(inode->root))); + ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root)); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); } else if (!inode_can_compress(inode) || From 6d4a6b515c39f1f8763093e0f828959b2fbc2f45 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 24 Mar 2022 08:36:45 -0700 Subject: [PATCH 176/423] btrfs: remove unused variable in btrfs_{start,write}_dirty_block_groups() Clang's version of -Wunused-but-set-variable recently gained support for unary operations, which reveals two unused variables: fs/btrfs/block-group.c:2949:6: error: variable 'num_started' set but not used [-Werror,-Wunused-but-set-variable] int num_started = 0; ^ fs/btrfs/block-group.c:3116:6: error: variable 'num_started' set but not used [-Werror,-Wunused-but-set-variable] int num_started = 0; ^ 2 errors generated. These variables appear to be unused from their introduction, so just remove them to silence the warnings. Fixes: c9dc4c657850 ("Btrfs: two stage dirty block group writeout") Fixes: 1bbc621ef284 ("Btrfs: allow block group cache writeout outside critical section in commit") CC: stable@vger.kernel.org # 5.4+ Link: https://github.com/ClangBuiltLinux/linux/issues/1614 Signed-off-by: Nathan Chancellor Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c22d287e020b..9ad265066225 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2946,7 +2946,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -3012,7 +3011,6 @@ again: cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -3113,7 +3111,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -3171,7 +3168,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { From 6d82ad13c4110e73c7b0392f00534a1502a1b520 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 28 Mar 2022 21:32:05 +0900 Subject: [PATCH 177/423] btrfs: release correct delalloc amount in direct IO write path Running generic/406 causes the following WARNING in btrfs_destroy_inode() which tells there are outstanding extents left. In btrfs_get_blocks_direct_write(), we reserve a temporary outstanding extents with btrfs_delalloc_reserve_metadata() (or indirectly from btrfs_delalloc_reserve_space(()). We then release the outstanding extents with btrfs_delalloc_release_extents(). However, the "len" can be modified in the COW case, which releases fewer outstanding extents than expected. Fix it by calling btrfs_delalloc_release_extents() for the original length. To reproduce the warning, the filesystem should be 1 GiB. It's triggering a short-write, due to not being able to allocate a large extent and instead allocating a smaller one. WARNING: CPU: 0 PID: 757 at fs/btrfs/inode.c:8848 btrfs_destroy_inode+0x1e6/0x210 [btrfs] Modules linked in: btrfs blake2b_generic xor lzo_compress lzo_decompress raid6_pq zstd zstd_decompress zstd_compress xxhash zram zsmalloc CPU: 0 PID: 757 Comm: umount Not tainted 5.17.0-rc8+ #101 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS d55cb5a 04/01/2014 RIP: 0010:btrfs_destroy_inode+0x1e6/0x210 [btrfs] RSP: 0018:ffffc9000327bda8 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff888100548b78 RCX: 0000000000000000 RDX: 0000000000026900 RSI: 0000000000000000 RDI: ffff888100548b78 RBP: ffff888100548940 R08: 0000000000000000 R09: ffff88810b48aba8 R10: 0000000000000001 R11: ffff8881004eb240 R12: ffff88810b48a800 R13: ffff88810b48ec08 R14: ffff88810b48ed00 R15: ffff888100490c68 FS: 00007f8549ea0b80(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f854a09e733 CR3: 000000010a2e9003 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: destroy_inode+0x33/0x70 dispose_list+0x43/0x60 evict_inodes+0x161/0x1b0 generic_shutdown_super+0x2d/0x110 kill_anon_super+0xf/0x20 btrfs_kill_super+0xd/0x20 [btrfs] deactivate_locked_super+0x27/0x90 cleanup_mnt+0x12c/0x180 task_work_run+0x54/0x80 exit_to_user_mode_prepare+0x152/0x160 syscall_exit_to_user_mode+0x12/0x30 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f854a000fb7 Fixes: f0bfa76a11e9 ("btrfs: fix ENOSPC failure when attempting direct IO write into NOCOW range") CC: stable@vger.kernel.org # 5.17 Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Filipe Manana Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 286ab7c27db0..53a3f5e5ae89 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7442,6 +7442,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, u64 block_start, orig_start, orig_block_len, ram_bytes; bool can_nocow = false; bool space_reserved = false; + u64 prev_len; int ret = 0; /* @@ -7469,6 +7470,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, can_nocow = true; } + prev_len = len; if (can_nocow) { struct extent_map *em2; @@ -7498,8 +7500,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, goto out; } } else { - const u64 prev_len = len; - /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; @@ -7530,7 +7530,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, * We have created our ordered extent, so we can now release our reservation * for an outstanding extent. */ - btrfs_delalloc_release_extents(BTRFS_I(inode), len); + btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len); /* * Need to update the i_size under the extent lock so buffered From d03ae0d3b687223de24d3dd1a7bca96034aeca25 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 30 Mar 2022 12:14:05 +0300 Subject: [PATCH 178/423] btrfs: remove support of balance v1 ioctl It was scheduled for removal in kernel v5.18 commit 6c405b24097c ("btrfs: deprecate BTRFS_IOC_BALANCE ioctl") thus its time has come. Reviewed-by: Sweet Tea Dorminy Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f46e71061942..be6c24577dbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5456,8 +5456,6 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_fs_info(fs_info, argp); case BTRFS_IOC_DEV_INFO: return btrfs_ioctl_dev_info(fs_info, argp); - case BTRFS_IOC_BALANCE: - return btrfs_ioctl_balance(file, NULL); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(inode, argp); case BTRFS_IOC_TREE_SEARCH_V2: From a690e5f2db4d1dca742ce734aaff9f3112d63764 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 Mar 2022 15:55:58 +0900 Subject: [PATCH 179/423] btrfs: mark resumed async balance as writing When btrfs balance is interrupted with umount, the background balance resumes on the next mount. There is a potential deadlock with FS freezing here like as described in commit 26559780b953 ("btrfs: zoned: mark relocation as writing"). Mark the process as sb_writing to avoid it. Reviewed-by: Filipe Manana CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2cfbc74a3b4e..a8cc736731fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4430,10 +4430,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } From 820c363bd526ec8e133e4b84e6ad1fda12023b4b Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 22 Mar 2022 18:11:33 +0900 Subject: [PATCH 180/423] btrfs: return allocated block group from do_chunk_alloc() Return the allocated block group from do_chunk_alloc(). This is a preparation patch for the next patch. CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9ad265066225..c2b898fe7b24 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3451,7 +3451,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) { struct btrfs_block_group *bg; int ret; @@ -3538,7 +3538,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) out: btrfs_trans_release_chunk_metadata(trans); - return ret; + if (ret) + return ERR_PTR(ret); + + btrfs_get_block_group(bg); + return bg; } /* @@ -3653,6 +3657,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; + struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; int ret = 0; @@ -3746,9 +3751,14 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - ret = do_chunk_alloc(trans, flags); + ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; + if (IS_ERR(ret_bg)) + ret = PTR_ERR(ret_bg); + else + btrfs_put_block_group(ret_bg); + spin_lock(&space_info->lock); if (ret < 0) { if (ret == -ENOSPC) From 760e69c4c2e2f475a812bdd414b62758215ce9cb Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 22 Mar 2022 18:11:34 +0900 Subject: [PATCH 181/423] btrfs: zoned: activate block group only for extent allocation In btrfs_make_block_group(), we activate the allocated block group, expecting that the block group is soon used for allocation. However, the chunk allocation from flush_space() context broke the assumption. There can be a large time gap between the chunk allocation time and the extent allocation time from the chunk. Activating the empty block groups pre-allocated from flush_space() context can exhaust the active zone counter of a device. Once we use all the active zone counts for empty pre-allocated block groups, we cannot activate new block group for the other things: metadata, tree-log, or data relocation block group. That failure results in a fake -ENOSPC. This patch introduces CHUNK_ALLOC_FORCE_FOR_EXTENT to distinguish the chunk allocation from find_free_extent(). Now, the new block group is activated only in that context. Fixes: eb66a010d518 ("btrfs: zoned: activate new block group") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 24 ++++++++++++++++-------- fs/btrfs/block-group.h | 4 ++++ fs/btrfs/extent-tree.c | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c2b898fe7b24..0dd6de994199 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2503,12 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - /* - * New block group is likely to be used soon. Try to activate it now. - * Failure is OK for now. - */ - btrfs_zone_activate(cache); - ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ @@ -3660,8 +3654,14 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; + bool from_extent_allocation = false; int ret = 0; + if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) { + from_extent_allocation = true; + force = CHUNK_ALLOC_FORCE; + } + /* Don't re-enter if we're already allocating a chunk */ if (trans->allocating_chunk) return -ENOSPC; @@ -3754,9 +3754,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; - if (IS_ERR(ret_bg)) + if (IS_ERR(ret_bg)) { ret = PTR_ERR(ret_bg); - else + } else if (from_extent_allocation) { + /* + * New block group is likely to be used soon. Try to activate + * it now. Failure is OK for now. + */ + btrfs_zone_activate(ret_bg); + } + + if (!ret) btrfs_put_block_group(ret_bg); spin_lock(&space_info->lock); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 93aabc68bb6a..e8308f2ad07d 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -35,11 +35,15 @@ enum btrfs_discard_state { * the FS with empty chunks * * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from + * find_free_extent() that also activaes the zone */ enum btrfs_chunk_alloc_enum { CHUNK_ALLOC_NO_FORCE, CHUNK_ALLOC_LIMITED, CHUNK_ALLOC_FORCE, + CHUNK_ALLOC_FORCE_FOR_EXTENT, }; struct btrfs_caching_control { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f477035a2ac2..6aa92f84f465 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4082,7 +4082,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, } ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, - CHUNK_ALLOC_FORCE); + CHUNK_ALLOC_FORCE_FOR_EXTENT); /* Do not bail out on ENOSPC since we can do more. */ if (ret == -ENOSPC) From 168a2f776b9762f4021421008512dd7ab7474df1 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 24 Mar 2022 06:44:54 -0700 Subject: [PATCH 182/423] btrfs: fix root ref counts in error handling in btrfs_get_root_ref In btrfs_get_root_ref(), when btrfs_insert_fs_root() fails, btrfs_put_root() can happen for two reasons: - the root already exists in the tree, in that case it returns the reference obtained in btrfs_lookup_fs_root() - another error so the cleanup is done in the fail label Calling btrfs_put_root() unconditionally would lead to double decrement of the root reference possibly freeing it in the second case. Reported-by: TOTE Robot Fixes: bc44d7c4b2b1 ("btrfs: push btrfs_grab_fs_root into btrfs_get_fs_root") CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Jia-Ju Bai Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09693ab4fde0..cebd7a78c964 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1849,9 +1849,10 @@ again: ret = btrfs_insert_fs_root(fs_info, root); if (ret) { - btrfs_put_root(root); - if (ret == -EEXIST) + if (ret == -EEXIST) { + btrfs_put_root(root); goto again; + } goto fail; } return root; From acee08aaf6d158d03668dc82b0a0eef41100531b Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 31 Mar 2022 14:58:28 -0700 Subject: [PATCH 183/423] btrfs: fix btrfs_submit_compressed_write cgroup attribution This restores the logic from commit 46bcff2bfc5e ("btrfs: fix compressed write bio blkcg attribution") which added cgroup attribution to btrfs writeback. It also adds back the REQ_CGROUP_PUNT flag for these ios. Fixes: 91507240482e ("btrfs: determine stripe boundary at bio allocation time in btrfs_submit_compressed_write") CC: stable@vger.kernel.org # 5.16+ Signed-off-by: Dennis Zhou Signed-off-by: David Sterba --- fs/btrfs/compression.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index be476f094300..19bf36d8ffea 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -537,6 +537,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; + if (blkcg_css) + kthread_associate_blkcg(blkcg_css); + while (cur_disk_bytenr < disk_start + compressed_len) { u64 offset = cur_disk_bytenr - disk_start; unsigned int index = offset >> PAGE_SHIFT; @@ -555,6 +558,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio = NULL; goto finish_cb; } + if (blkcg_css) + bio->bi_opf |= REQ_CGROUP_PUNT; } /* * We should never reach next_stripe_start start as we will @@ -612,6 +617,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, return 0; finish_cb: + if (blkcg_css) + kthread_associate_blkcg(NULL); + if (bio) { bio->bi_status = ret; bio_endio(bio); From 62c31868f528e8657947694913f1e76db816425b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Mar 2022 14:31:51 +0200 Subject: [PATCH 184/423] media: platform: imx-mipi-csis: Add dependency on VIDEO_DEV The imx-mipi-csis driver (VIDEO_IMX_MIPI_CSIS) lost its dependency on VIDEO_DEV in commit 63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform"). This causes build failures with configurations that don't have VIDEO_DEV set. Fix it by restoring the dependency. Link: https://lore.kernel.org/linux-media/20220331123151.1953-1-laurent.pinchart@ideasonboard.com Fixes: 63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform") Reported-by: kernel test robot Signed-off-by: Laurent Pinchart Signed-off-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/nxp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/nxp/Kconfig b/drivers/media/platform/nxp/Kconfig index 28f2bafc14d2..5afa373e534f 100644 --- a/drivers/media/platform/nxp/Kconfig +++ b/drivers/media/platform/nxp/Kconfig @@ -7,6 +7,7 @@ comment "NXP media platform drivers" config VIDEO_IMX_MIPI_CSIS tristate "NXP MIPI CSI-2 CSIS receiver found on i.MX7 and i.MX8 models" depends on ARCH_MXC || COMPILE_TEST + depends on VIDEO_DEV select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API From 2da0aebc74dba6a09ac90b88e38860fbc65d6c0a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:36 +0000 Subject: [PATCH 185/423] KVM: arm64: Generally disallow SMC64 for AArch32 guests The only valid calling SMC calling convention from an AArch32 state is SMC32. Disallow any PSCI function that sets the SMC64 function ID bit when called from AArch32 rather than comparing against known SMC64 PSCI functions. Note that without this change KVM advertises the SMC64 flavor of SYSTEM_RESET2 to AArch32 guests. Fixes: d43583b890e7 ("KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest") Acked-by: Will Deacon Reviewed-by: Reiji Watanabe Reviewed-by: Andrew Jones Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-2-oupton@google.com --- arch/arm64/kvm/psci.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 372da09a2fab..a76d03d50624 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -215,15 +215,11 @@ static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) { - switch(fn) { - case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_CPU_ON: - case PSCI_0_2_FN64_AFFINITY_INFO: - /* Disallow these functions for 32bit guests */ - if (vcpu_mode_is_32bit(vcpu)) - return PSCI_RET_NOT_SUPPORTED; - break; - } + /* + * Prevent 32 bit guests from calling 64 bit PSCI functions. + */ + if ((fn & PSCI_0_2_64BIT) && vcpu_mode_is_32bit(vcpu)) + return PSCI_RET_NOT_SUPPORTED; return 0; } From 827c2ab3314814e1c7d873372c0fe0cad50ba1c5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:37 +0000 Subject: [PATCH 186/423] KVM: arm64: Actually prevent SMC64 SYSTEM_RESET2 from AArch32 The SMCCC does not allow the SMC64 calling convention to be used from AArch32. While KVM checks to see if the calling convention is allowed in PSCI_1_0_FN_PSCI_FEATURES, it does not actually prevent calls to unadvertised PSCI v1.0+ functions. Hoist the check to see if the requested function is allowed into kvm_psci_call(), thereby preventing SMC64 calls from AArch32 for all PSCI versions. Fixes: d43583b890e7 ("KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest") Acked-by: Will Deacon Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-3-oupton@google.com --- arch/arm64/kvm/psci.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index a76d03d50624..faf403a72fdf 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -231,10 +231,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) unsigned long val; int ret = 1; - val = kvm_psci_check_allowed_function(vcpu, psci_fn); - if (val) - goto out; - switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: /* @@ -302,7 +298,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; } -out: smccc_set_retval(vcpu, val, 0, 0, 0); return ret; } @@ -422,6 +417,15 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) */ int kvm_psci_call(struct kvm_vcpu *vcpu) { + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + + val = kvm_psci_check_allowed_function(vcpu, psci_fn); + if (val) { + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; + } + switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_1_1: return kvm_psci_1_x_call(vcpu, 1); From 73b725c7a6c82eee10fa2d6752babefff795ca9a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:38 +0000 Subject: [PATCH 187/423] KVM: arm64: Drop unneeded minor version check from PSCI v1.x handler We already sanitize the guest's PSCI version when it is being written by userspace, rejecting unsupported version numbers. Additionally, the 'minor' parameter to kvm_psci_1_x_call() is a constant known at compile time for all callsites. Though it is benign, the additional check against the PSCI kvm_psci_1_x_call() is unnecessary and likely to be missed the next time KVM raises its maximum PSCI version. Drop the check altogether and rely on sanitization when the PSCI version is set by userspace. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-4-oupton@google.com --- arch/arm64/kvm/psci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index faf403a72fdf..baac2b405f23 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -309,9 +309,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) unsigned long val; int ret = 1; - if (minor > 1) - return -EINVAL; - switch(psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1; From f587661f21eb9a38af52488bbe54ce61a64dfae8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 1 Apr 2022 19:46:52 +0000 Subject: [PATCH 188/423] KVM: arm64: Don't split hugepages outside of MMU write lock It is possible to take a stage-2 permission fault on a page larger than PAGE_SIZE. For example, when running a guest backed by 2M HugeTLB, KVM eagerly maps at the largest possible block size. When dirty logging is enabled on a memslot, KVM does *not* eagerly split these 2M stage-2 mappings and instead clears the write bit on the pte. Since dirty logging is always performed at PAGE_SIZE granularity, KVM lazily splits these 2M block mappings down to PAGE_SIZE in the stage-2 fault handler. This operation must be done under the write lock. Since commit f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging"), the stage-2 fault handler conditionally takes the read lock on permission faults with dirty logging enabled. To that end, it is possible to split a 2M block mapping while only holding the read lock. The problem is demonstrated by running kvm_page_table_test with 2M anonymous HugeTLB, which splats like so: WARNING: CPU: 5 PID: 15276 at arch/arm64/kvm/hyp/pgtable.c:153 stage2_map_walk_leaf+0x124/0x158 [...] Call trace: stage2_map_walk_leaf+0x124/0x158 stage2_map_walker+0x5c/0xf0 __kvm_pgtable_walk+0x100/0x1d4 __kvm_pgtable_walk+0x140/0x1d4 __kvm_pgtable_walk+0x140/0x1d4 kvm_pgtable_walk+0xa0/0xf8 kvm_pgtable_stage2_map+0x15c/0x198 user_mem_abort+0x56c/0x838 kvm_handle_guest_abort+0x1fc/0x2a4 handle_exit+0xa4/0x120 kvm_arch_vcpu_ioctl_run+0x200/0x448 kvm_vcpu_ioctl+0x588/0x664 __arm64_sys_ioctl+0x9c/0xd4 invoke_syscall+0x4c/0x144 el0_svc_common+0xc4/0x190 do_el0_svc+0x30/0x8c el0_svc+0x28/0xcc el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x1a4/0x1a8 Fix the issue by only acquiring the read lock if the guest faulted on a PAGE_SIZE granule w/ dirty logging enabled. Add a WARN to catch locking bugs in future changes. Fixes: f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging") Cc: Jing Zhang Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220401194652.950240-1-oupton@google.com --- arch/arm64/kvm/mmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0d19259454d8..53ae2c0640bc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1079,7 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool logging_perm_fault = false; + bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1114,7 +1114,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - logging_perm_fault = (fault_status == FSC_PERM && write_fault); + use_read_lock = (fault_status == FSC_PERM && write_fault && + fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1218,7 +1219,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * logging dirty logging, only acquire read lock for permission * relaxation. */ - if (logging_perm_fault) + if (use_read_lock) read_lock(&kvm->mmu_lock); else write_lock(&kvm->mmu_lock); @@ -1268,6 +1269,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { + WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); + ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, memcache); @@ -1280,7 +1283,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (logging_perm_fault) + if (use_read_lock) read_unlock(&kvm->mmu_lock); else write_unlock(&kvm->mmu_lock); From c707663e81ef48d279719e97fd86acef835a2671 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Tue, 29 Mar 2022 03:20:59 -0700 Subject: [PATCH 189/423] KVM: arm64: vgic: Remove unnecessary type castings Remove unnecessary casts. Signed-off-by: Yu Zhe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329102059.268983-1-yuzhe@nfschina.com --- arch/arm64/kvm/vgic/vgic-debug.c | 10 +++++----- arch/arm64/kvm/vgic/vgic-its.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index f38c40a76251..78cde687383c 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -82,7 +82,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) static void *vgic_debug_start(struct seq_file *s, loff_t *pos) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter; mutex_lock(&kvm->lock); @@ -110,7 +110,7 @@ out: static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter = kvm->arch.vgic.iter; ++*pos; @@ -122,7 +122,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) static void vgic_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter; /* @@ -229,8 +229,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, static int vgic_debug_show(struct seq_file *s, void *v) { - struct kvm *kvm = (struct kvm *)s->private; - struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct kvm *kvm = s->private; + struct vgic_state_iter *iter = v; struct vgic_irq *irq; struct kvm_vcpu *vcpu = NULL; unsigned long flags; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 089fc2ffcb43..2e13402be3bd 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2143,7 +2143,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, void *ptr, void *opaque) { - struct its_device *dev = (struct its_device *)opaque; + struct its_device *dev = opaque; struct its_collection *collection; struct kvm *kvm = its->dev->kvm; struct kvm_vcpu *vcpu = NULL; From a2c0b0fbe01419f8f5d1c0b9c581631f34ffce8b Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 5 Apr 2022 11:47:33 +0100 Subject: [PATCH 190/423] arm64: alternatives: mark patch_alternative() as `noinstr` The alternatives code must be `noinstr` such that it does not patch itself, as the cache invalidation is only performed after all the alternatives have been applied. Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update() and get_alt_insn() with `__always_inline` since they are both only called through patch_alternative(). Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused a boot hang: [ 0.241121] CPU: All CPU(s) started at EL2 The alternatives code was patching the atomics in __tsan_read4() from LL/SC atomics to LSE atomics. The following fragment is using LL/SC atomics in the .text section: | <__tsan_unaligned_read4+304>: ldxr x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This LL/SC atomic sequence was to be replaced with LSE atomics. However since the alternatives code was instrumentable, __tsan_read4() was being called after only the first instruction was replaced, which led to the following code in memory: | <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This caused an infinite loop as the `stxr` instruction never completed successfully, so `w7` was always 0. Signed-off-by: Joey Gouly Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 3fb79b76e9d9..7bbf5104b7b7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; From 26bf74bd9f6ff0f1545b4f0c92a37c232d076014 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Mon, 28 Mar 2022 20:19:23 -0700 Subject: [PATCH 191/423] KVM: arm64: mixed-width check should be skipped for uninitialized vCPUs KVM allows userspace to configure either all EL1 32bit or 64bit vCPUs for a guest. At vCPU reset, vcpu_allowed_register_width() checks if the vcpu's register width is consistent with all other vCPUs'. Since the checking is done even against vCPUs that are not initialized (KVM_ARM_VCPU_INIT has not been done) yet, the uninitialized vCPUs are erroneously treated as 64bit vCPU, which causes the function to incorrectly detect a mixed-width VM. Introduce KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED bits for kvm->arch.flags. A value of the EL1_32BIT bit indicates that the guest needs to be configured with all 32bit or 64bit vCPUs, and a value of the REG_WIDTH_CONFIGURED bit indicates if a value of the EL1_32BIT bit is valid (already set up). Values in those bits are set at the first KVM_ARM_VCPU_INIT for the guest based on KVM_ARM_VCPU_EL1_32BIT configuration for the vCPU. Check vcpu's register width against those new bits at the vcpu's KVM_ARM_VCPU_INIT (instead of against other vCPUs' register width). Fixes: 66e94d5cafd4 ("KVM: arm64: Prevent mixed-width VM creation") Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329031924.619453-2-reijiw@google.com --- arch/arm64/include/asm/kvm_emulate.h | 27 +++++++---- arch/arm64/include/asm/kvm_host.h | 10 +++++ arch/arm64/kvm/reset.c | 67 +++++++++++++++++++--------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d62405ce3e6d..7496deab025a 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -43,10 +43,22 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); +#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } +#else +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, + &kvm->arch.flags)); + + return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); +} +#endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { @@ -72,15 +84,14 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TVM; } - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) + if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - - /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. - */ - if (!vcpu_el1_is_32bit(vcpu)) + else + /* + * TID3: trap feature register accesses that we virtualise. + * For now this is conditional, since no AArch32 feature regs + * are currently virtualised. + */ vcpu->arch.hcr_el2 |= HCR_TID3; if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e3b25dc6c367..94a27a7520f4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -127,6 +127,16 @@ struct kvm_arch { #define KVM_ARCH_FLAG_MTE_ENABLED 1 /* At least one vCPU has ran in the VM */ #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 + /* + * The following two bits are used to indicate the guest's EL1 + * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT + * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. + * Otherwise, the guest's EL1 register width has not yet been + * determined yet. + */ +#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 +#define KVM_ARCH_FLAG_EL1_32BIT 4 + unsigned long flags; /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index ecc40c8cd6f6..6c70c6f61c70 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -181,27 +181,51 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) return 0; } -static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) +/** + * kvm_set_vm_width() - set the register width for the guest + * @vcpu: Pointer to the vcpu being configured + * + * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED + * in the VM flags based on the vcpu's requested register width, the HW + * capabilities and other options (such as MTE). + * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be + * consistent with the value of the FLAG_EL1_32BIT bit in the flags. + * + * Return: 0 on success, negative error code on failure. + */ +static int kvm_set_vm_width(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; + struct kvm *kvm = vcpu->kvm; bool is32bit; - unsigned long i; is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) - return false; - /* MTE is incompatible with AArch32 */ - if (kvm_has_mte(vcpu->kvm) && is32bit) - return false; + lockdep_assert_held(&kvm->lock); - /* Check that the vcpus are either all 32bit or all 64bit */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) - return false; + if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { + /* + * The guest's register width is already configured. + * Make sure that the vcpu is consistent with it. + */ + if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags)) + return 0; + + return -EINVAL; } - return true; + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) + return -EINVAL; + + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(kvm) && is32bit) + return -EINVAL; + + if (is32bit) + set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); + + set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags); + + return 0; } /** @@ -230,10 +254,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) u32 pstate; mutex_lock(&vcpu->kvm->lock); - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); + ret = kvm_set_vm_width(vcpu); + if (!ret) { + reset_state = vcpu->arch.reset_state; + WRITE_ONCE(vcpu->arch.reset_state.reset, false); + } mutex_unlock(&vcpu->kvm->lock); + if (ret) + return ret; + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -260,14 +290,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } - if (!vcpu_allowed_register_width(vcpu)) { - ret = -EINVAL; - goto out; - } - switch (vcpu->arch.target) { default: - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (vcpu_el1_is_32bit(vcpu)) { pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; From 2f5d27e6cf14efe652748bad89ee529ed5a5d577 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Mon, 28 Mar 2022 20:19:24 -0700 Subject: [PATCH 192/423] KVM: arm64: selftests: Introduce vcpu_width_config Introduce a test for aarch64 that ensures non-mixed-width vCPUs (all 64bit vCPUs or all 32bit vcPUs) can be configured, and mixed-width vCPUs cannot be configured. Reviewed-by: Andrew Jones Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329031924.619453-3-reijiw@google.com --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/vcpu_width_config.c | 122 ++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d1e8f5237469..573d93a1d61f 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -3,6 +3,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/psci_cpu_on_test +/aarch64/vcpu_width_config /aarch64/vgic_init /aarch64/vgic_irq /s390x/memop diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 21c2dbd21a81..681b173aa87c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test +TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c new file mode 100644 index 000000000000..6e9402679229 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. + * + * Copyright (c) 2022 Google LLC. + * + * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs + * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be + * configured. + */ + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + + +/* + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2. + */ +static int add_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + vm_vcpu_add(vm, 1); + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1, + * and run KVM_ARM_VCPU_INIT for another vCPU with @init2. + */ +static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + vm_vcpu_add(vm, 1); + + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be + * configured, and two mixed-width vCPUs cannot be configured. + * Each of those three cases, configure vCPUs in two different orders. + * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running + * KVM_ARM_VCPU_INIT for them. + * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, + * and then run those commands for another vCPU. + */ +int main(void) +{ + struct kvm_vcpu_init init1, init2; + struct kvm_vm *vm; + int ret; + + if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) { + print_skip("KVM_CAP_ARM_EL1_32BIT is not supported"); + exit(KSFT_SKIP); + } + + /* Get the preferred target type and copy that to init2 for later use */ + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1); + kvm_vm_free(vm); + init2 = init1; + + /* Test with 64bit vCPUs */ + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + + /* Test with 32bit vCPUs */ + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + + /* Test with mixed-width vCPUs */ + init1.features[0] = 0; + init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + + return 0; +} From 697a1d44af8ba0477ee729e632f4ade37999249a Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 30 Mar 2022 12:25:43 +0100 Subject: [PATCH 193/423] tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry tlb_remove_huge_tlb_entry only considers PMD_SIZE and PUD_SIZE when updating the mmu_gather structure. Unfortunately on arm64 there are two additional huge page sizes that need to be covered: CONT_PTE_SIZE and CONT_PMD_SIZE. Where an end-user attempts to employ contiguous huge pages, a VM_BUG_ON can be experienced due to the fact that the tlb structure hasn't been correctly updated by the relevant tlb_flush_p.._range() call from tlb_remove_huge_tlb_entry. This patch adds inequality logic to the generic implementation of tlb_remove_huge_tlb_entry s.t. CONT_PTE_SIZE and CONT_PMD_SIZE are effectively covered on arm64. Also, as well as ptes, pmds and puds; p4ds are now considered too. Reported-by: David Hildenbrand Suggested-by: Peter Zijlstra (Intel) Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/linux-mm/811c5c8e-b3a2-85d2-049c-717f17c3a03a@redhat.com/ Signed-off-by: Steve Capper Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Reviewed-by: Catalin Marinas Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220330112543.863-1-steve.capper@arm.com Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index fd7feb5c7894..eee6f7763a39 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - if (_sz == PMD_SIZE) \ - tlb_flush_pmd_range(tlb, address, _sz); \ - else if (_sz == PUD_SIZE) \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) From 4ded53ea0c7d46fa043efc7320e17ca443a1debb Mon Sep 17 00:00:00 2001 From: Enze Li Date: Fri, 1 Apr 2022 22:18:42 +0100 Subject: [PATCH 194/423] cdrom: remove unused variable The clang static analyzer reports the following warning, File: drivers/cdrom/cdrom.c Warning: line 1380, column 7 Although the value stored to 'status' is used in enclosing expression, the value is never actually read from 'status' Remove the unused variable to eliminate the warning. Signed-off-by: Enze Li Link: https://lore.kernel.org/all/20220401032623.293666-1-lienze@kylinos.cn Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20220401211842.2088096-1-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 7bd10d63ddbe..2dc9da683a13 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1365,7 +1365,6 @@ out_free: */ int cdrom_number_of_slots(struct cdrom_device_info *cdi) { - int status; int nslots = 1; struct cdrom_changer_info *info; @@ -1377,7 +1376,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi) if (!info) return -ENOMEM; - if ((status = cdrom_read_mech_status(cdi, info)) == 0) + if (cdrom_read_mech_status(cdi, info) == 0) nslots = info->hdr.nslots; kfree(info); From 409543cec01a84610029d6440c480c3fdd7214fb Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 6 Apr 2022 18:52:38 +0530 Subject: [PATCH 195/423] spi: core: add dma_map_dev for __spi_unmap_msg() Commit b470e10eb43f ("spi: core: add dma_map_dev for dma device") added dma_map_dev for _spi_map_msg() but missed to add for unmap routine, __spi_unmap_msg(), so add it now. Fixes: b470e10eb43f ("spi: core: add dma_map_dev for dma device") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20220406132238.1029249-1-vkoul@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c4dd1200fe99..32443ae175fd 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1130,11 +1130,15 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) if (ctlr->dma_tx) tx_dev = ctlr->dma_tx->device->dev; + else if (ctlr->dma_map_dev) + tx_dev = ctlr->dma_map_dev; else tx_dev = ctlr->dev.parent; if (ctlr->dma_rx) rx_dev = ctlr->dma_rx->device->dev; + else if (ctlr->dma_map_dev) + rx_dev = ctlr->dma_map_dev; else rx_dev = ctlr->dev.parent; From 97e4827d775faa9a32b5e1a97959c69dd77d17a3 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 31 Mar 2022 13:08:19 +0200 Subject: [PATCH 196/423] spi: cadence-quadspi: fix protocol setup for non-1-1-X operations cqspi_set_protocol() only set the data width, but ignored the command and address width (except for 8-8-8 DTR ops), leading to corruption of all transfers using 1-X-X or X-X-X ops. Fix by setting the other two widths as well. While we're at it, simplify the code a bit by replacing the CQSPI_INST_TYPE_* constants with ilog2(). Tested on a TI AM64x with a Macronix MX25U51245G QSPI flash with 1-4-4 read and write operations. Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20220331110819.133392-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 46 ++++++++----------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index b0c9f62ccefb..616ada891974 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -102,12 +103,6 @@ struct cqspi_driver_platdata { #define CQSPI_TIMEOUT_MS 500 #define CQSPI_READ_TIMEOUT_MS 10 -/* Instruction type */ -#define CQSPI_INST_TYPE_SINGLE 0 -#define CQSPI_INST_TYPE_DUAL 1 -#define CQSPI_INST_TYPE_QUAD 2 -#define CQSPI_INST_TYPE_OCTAL 3 - #define CQSPI_DUMMY_CLKS_PER_BYTE 8 #define CQSPI_DUMMY_BYTES_MAX 4 #define CQSPI_DUMMY_CLKS_MAX 31 @@ -376,10 +371,6 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, const struct spi_mem_op *op) { - f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - /* * For an op to be DTR, cmd phase along with every other non-empty * phase should have dtr field set to 1. If an op phase has zero @@ -389,32 +380,23 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, (!op->addr.nbytes || op->addr.dtr) && (!op->data.nbytes || op->data.dtr); - switch (op->data.buswidth) { - case 0: - break; - case 1: - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - break; - case 2: - f_pdata->data_width = CQSPI_INST_TYPE_DUAL; - break; - case 4: - f_pdata->data_width = CQSPI_INST_TYPE_QUAD; - break; - case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; - break; - default: - return -EINVAL; - } + f_pdata->inst_width = 0; + if (op->cmd.buswidth) + f_pdata->inst_width = ilog2(op->cmd.buswidth); + + f_pdata->addr_width = 0; + if (op->addr.buswidth) + f_pdata->addr_width = ilog2(op->addr.buswidth); + + f_pdata->data_width = 0; + if (op->data.buswidth) + f_pdata->data_width = ilog2(op->data.buswidth); /* Right now we only support 8-8-8 DTR mode. */ if (f_pdata->dtr) { switch (op->cmd.buswidth) { case 0: - break; case 8: - f_pdata->inst_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -422,9 +404,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->addr.buswidth) { case 0: - break; case 8: - f_pdata->addr_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -432,9 +412,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->data.buswidth) { case 0: - break; case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; From 2c7d1b281286c46049cd22b43435cecba560edde Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 28 Mar 2022 10:24:42 -0400 Subject: [PATCH 197/423] spi: bcm-qspi: fix MSPI only access with bcm_qspi_exec_mem_op() This fixes case where MSPI controller is used to access spi-nor flash and BSPI block is not present. Fixes: 5f195ee7d830 ("spi: bcm-qspi: Implement the spi_mem interface") Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220328142442.7553-1-kdasu.kdev@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 86c76211b3d3..cad2d55dcd3d 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1205,7 +1205,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, addr = op->addr.val; len = op->data.nbytes; - if (bcm_qspi_bspi_ver_three(qspi) == true) { + if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) { /* * The address coming into this function is a raw flash offset. * But for BSPI <= V3, we need to convert it to a remapped BSPI @@ -1224,7 +1224,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, len < 4) mspi_read = true; - if (mspi_read) + if (!has_bspi(qspi) || mspi_read) return bcm_qspi_mspi_exec_mem_op(spi, op); ret = bcm_qspi_bspi_set_mode(qspi, op, 0); From 945da79e6dd058be70bc47442dce319844e14daa Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Thu, 24 Mar 2022 12:19:06 -0400 Subject: [PATCH 198/423] drm/amdgpu/vcn3: send smu interface type For VCN FW to detect ASIC type, in order to use different mailbox registers. V2: simplify codes and fix format issue. Signed-off-by: Boyuan Zhang Acked-by Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 7 +++++++ drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e2fde88aaf5e..f06fb7f882e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -159,6 +159,7 @@ #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) +#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -279,6 +280,11 @@ struct amdgpu_fw_shared_fw_logging { uint32_t size; }; +struct amdgpu_fw_shared_smu_interface_info { + uint8_t smu_interface_type; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -287,6 +293,7 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 0d590183328f..e1cca0a10653 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -219,6 +219,11 @@ static int vcn_v3_0_sw_init(void *handle) cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); + fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2)) + fw_shared->smu_interface_info.smu_interface_type = 2; + else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1)) + fw_shared->smu_interface_info.smu_interface_type = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); From dda81d9761d07541c404dd5fa93e773a8eda5ddc Mon Sep 17 00:00:00 2001 From: tiancyin Date: Sun, 27 Mar 2022 19:07:13 +0800 Subject: [PATCH 199/423] drm/amd/vcn: fix an error msg on vcn 3.0 Some video card has more than one vcn instance, passing 0 to vcn_v3_0_pause_dpg_mode is incorrect. Error msg: Register(1) [mmUVD_POWER_STATUS] failed to reach value 0x00000001 != 0x00000002 Reviewed-by: James Zhu Signed-off-by: tiancyin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e1cca0a10653..cb5f0a12333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1488,7 +1488,7 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v3_0_pause_dpg_mode(adev, 0, &state); + vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, From 862a876c3a6372f2fa9d0c6510f1976ac94fc857 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 15 Mar 2022 12:21:43 -0400 Subject: [PATCH 200/423] drm/amd/display: Correct Slice reset calculation [Why] Once DSC slice cannot fit pixel clock, we incorrectly reset min slices to 0 and allow max slice to operate, even when max slice itself cannot fit the pixel clock properly. [How] Change the sequence such that we correctly determine DSC is not possible when both min slices and max slices cannot fit pixel clock per slice. Reviewed-by: Wenjing Liu Acked-by: Alex Hung Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index efc2339f1fa0..4385d19bc489 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -864,11 +864,11 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } + is_dsc_possible = (min_slices_h <= max_slices_h); + if (pic_width % min_slices_h != 0) min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - is_dsc_possible = (min_slices_h <= max_slices_h); - if (min_slices_h == 0 && max_slices_h == 0) is_dsc_possible = false; From 95707203407c4cf0b7e520a99d6f46d8aed4b57f Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 15 Mar 2022 14:57:34 -0400 Subject: [PATCH 201/423] drm/amd/display: Remove redundant dsc power gating from init_hw [Why] DSC Power down code has been moved from dcn31_init_hw into init_pipes() Need to remove it from dcn10_init_hw() as well to avoid duplicated action on dcn1.x/2.x [How] Remove DSC power down code from dcn10_init_hw() Fixes: 8fa6f4c5715c ("drm/amd/display: fixed the DSC power off sequence during Driver PnP") Reviewed-by: Anthony Koo Reviewed-by: Eric Yang Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index ad757b59e00e..1dec40db582d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1497,13 +1497,6 @@ void dcn10_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - if (!is_optimized_init_done) { - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - } - /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); From 58e16c752e9540b28a873c44c3bee83e022007c1 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 15 Mar 2022 16:31:14 -0400 Subject: [PATCH 202/423] drm/amd/display: Enable power gating before init_pipes [Why] In init_hw() we call init_pipes() before enabling power gating. init_pipes() tries to power gate dsc but it may fail because required force-ons are not released yet. As a result with dsc config the following errors observed on resume: "REG_WAIT timeout 1us * 1000 tries - dcn20_dsc_pg_control" "REG_WAIT timeout 1us * 1000 tries - dcn20_dpp_pg_control" "REG_WAIT timeout 1us * 1000 tries - dcn20_hubp_pg_control" [How] Move enable_power_gating_plane() before init_pipes() in init_hw() Reviewed-by: Anthony Koo Reviewed-by: Eric Yang Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 1dec40db582d..781334b395ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1500,6 +1500,9 @@ void dcn10_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -1552,8 +1555,6 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index ed0a0e5fd805..f61ec8763844 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -547,6 +547,9 @@ void dcn30_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -624,8 +627,6 @@ void dcn30_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 4be228680909..ffc58e24eaf8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -199,6 +199,9 @@ void dcn31_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -248,8 +251,6 @@ void dcn31_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); From 83e9faac9a387894e945e7f33c2bb7a9c348257c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 15 Mar 2022 16:42:33 -0400 Subject: [PATCH 203/423] drm/amd/display: Clear optc false state when disable otg [why] when disable optc, need to clear the underflow status as well. Reviewed-by: Chris Park Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 14 +++++++------- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c | 5 ++++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index ffc58e24eaf8..631d8ac63aa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -339,20 +339,20 @@ void dcn31_enable_power_gating_plane( bool enable) { bool force_on = true; /* disable power gating */ + uint32_t org_ip_request_cntl = 0; if (enable && !hws->ctx->dc->debug.disable_hubp_power_gate) force_on = false; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); /* DCHUBP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); /* DPP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); force_on = true; /* disable power gating */ if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) @@ -360,11 +360,11 @@ void dcn31_enable_power_gating_plane( /* DCS0/1/2/3/4/5 */ REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index 8afe2130d7c5..e05527a3a8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -124,7 +124,6 @@ static bool optc31_enable_crtc(struct timing_generator *optc) static bool optc31_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line * in the vertical blank region */ @@ -138,6 +137,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + optc1_clear_optc_underflow(optc); return true; } @@ -158,6 +158,9 @@ static bool optc31_immediate_disable_crtc(struct timing_generator *optc) OTG_BUSY, 0, 1, 100000); + /* clear the false state */ + optc1_clear_optc_underflow(optc); + return true; } From 2944dbedc7e167221fdb99531f7b0cdbac9ac696 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Wed, 9 Mar 2022 18:26:47 -0500 Subject: [PATCH 204/423] drm/amd/display: Add work around for AUX failure on wake. [Why] When waking from low-power states, a DP sink may remain unresponsive to AUX transactions. [How] Try to toggle DPCD SET_POWER register repeatedly (up to a maximum timeout value) until DP sink becomes responsive. Reviewed-by: Mustapha Ghaddar Acked-by: Alex Hung Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 59 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/dc_link_dp.h | 1 + 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 351081f574cb..e4df81dc1dc2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5216,6 +5216,62 @@ static void retrieve_cable_id(struct dc_link *link) &link->dpcd_caps.cable_id, &usbc_cable_id); } +/* DPRX may take some time to respond to AUX messages after HPD asserted. + * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600). + */ +static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms) +{ + enum dc_status status = DC_ERROR_UNEXPECTED; + uint8_t dpcd_data = 0; + uint64_t start_ts = 0; + uint64_t current_ts = 0; + uint64_t time_taken_ms = 0; + enum dc_connection_type type = dc_connection_none; + + status = core_link_read_dpcd( + link, + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, + &dpcd_data, + sizeof(dpcd_data)); + + if (status != DC_OK) { + DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.", + __func__, + timeout_ms); + start_ts = dm_get_timestamp(link->ctx); + + do { + if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) + break; + + dpcd_data = DP_SET_POWER_D3; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + dpcd_data = DP_SET_POWER_D0; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + current_ts = dm_get_timestamp(link->ctx); + time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000); + } while (status != DC_OK && time_taken_ms < timeout_ms); + + DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s", + __func__, + (status == DC_OK) ? "succeeded" : "failed", + time_taken_ms, + (type == dc_connection_none) ? ". Unplugged." : "."); + } + + return status; +} + static bool retrieve_link_cap(struct dc_link *link) { /* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16, @@ -5251,6 +5307,9 @@ static bool retrieve_link_cap(struct dc_link *link) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); + /* Try to ensure AUX channel active before proceeding. */ + status = wa_try_to_wake_dprx(link, LINK_AUX_WAKE_TIMEOUT_MS); + is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index ab9939db8cea..44f167d2584f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -33,6 +33,7 @@ #define MAX_MTP_SLOT_COUNT 64 #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #define TRAINING_AUX_RD_INTERVAL 100 //us +#define LINK_AUX_WAKE_TIMEOUT_MS 1500 // Timeout when trying to wake unresponsive DPRX. struct dc_link; struct dc_stream_state; From b2075fce104b88b789c15ef1ed2b91dc94198e26 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Fri, 18 Mar 2022 11:12:36 -0400 Subject: [PATCH 205/423] drm/amd/display: Revert FEC check in validation why and how: causes failure on install on certain machines Reviewed-by: George Shen Acked-by: Alex Hung Signed-off-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f2ad8f58e69c..c436db416708 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1496,10 +1496,6 @@ bool dc_validate_boot_timing(const struct dc *dc, if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; - /* Check for FEC status*/ - if (link->link_enc->funcs->fec_is_active(link->link_enc)) - return false; - enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); if (enc_inst == ENGINE_ID_UNKNOWN) From f4346fb3edf7720db3f7f5e1cab1f667cd024280 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 17 Mar 2022 19:55:05 -0400 Subject: [PATCH 206/423] drm/amd/display: Fix allocate_mst_payload assert on resume [Why] On resume we do link detection for all non-MST connectors. MST is handled separately. However the condition for telling if connector is on mst branch is not enough for mst hub case. Link detection for mst branch link leads to mst topology reset. That causes assert in dc_link_allocate_mst_payload() [How] Use link type as indicator for mst link. Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..5895afefccb7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2714,7 +2714,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); From 4052287a75eb3fc0f487fcc5f768a38bede455c8 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Fri, 11 Mar 2022 20:30:17 +0530 Subject: [PATCH 207/423] amd/display: set backlight only if required [Why] comparing pwm bl values (coverted) with user brightness(converted) levels in commit_tail leads to continuous setting of backlight via dmub as they don't to match. This leads overdrive in queuing of commands to DMCU that sometimes lead to depending on load on DMCU fw: "[drm:dc_dmub_srv_wait_idle] *ERROR* Error waiting for DMUB idle: status=3" [How] Store last successfully set backlight value and compare with it instead of pwm reads which is not what we should compare with. Signed-off-by: Shirish S Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5895afefccb7..62139ff35476 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3973,7 +3973,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap max - min); } -static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, int bl_idx, u32 user_brightness) { @@ -4004,7 +4004,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } - return rc ? 0 : 1; + if (rc) + dm->actual_brightness[bl_idx] = user_brightness; } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) @@ -9948,7 +9949,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* restore the backlight level */ for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i] && - (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + (dm->actual_brightness[i] != dm->brightness[i])) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 6a908d736d6a..7e44b0429448 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,12 @@ struct amdgpu_display_manager { * cached backlight values. */ u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + /** + * @actual_brightness: + * + * last successfully applied backlight values. + */ + u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; }; enum dsc_clock_force_state { From 96f2b7a3571618a1c8aed694c9e668014c70898b Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Thu, 31 Mar 2022 12:19:00 +0800 Subject: [PATCH 208/423] drm/amdgpu: fix incorrect GCR_GENERAL_CNTL address gfx10.3.3/gfx10.3.6/gfx10.3.7 shall use 0x1580 address for GCR_GENERAL_CNTL Acked-by: Prike Liang Acked-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a98b78e0b507..9426e252d8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3293,7 +3293,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3429,7 +3429,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3454,7 +3454,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), From ef1a0808a2e20c58d166c5707864fba515832bd7 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 23 Mar 2022 21:16:19 +0800 Subject: [PATCH 209/423] drm/amdgpu: Sync up header and implementation to use the same parameter names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync up header and implementation to use the same parameter names in function amdgpu_ring_init. ring_size -> max_dw, prio -> hw_prio Reviewed-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5320bb0883d8..317d80209e95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -300,8 +300,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int ring_size, struct amdgpu_irq_src *irq_src, - unsigned int irq_type, unsigned int prio, + unsigned int max_dw, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int hw_prio, atomic_t *sched_score); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, From e79a2398e1b2d47060474dca291542368183bc0f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 31 Mar 2022 13:21:17 +0100 Subject: [PATCH 210/423] drm/amdkfd: Create file descriptor after client is added to smi_clients list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures userspace cannot prematurely clean-up the client before it is fully initialised which has been proven to cause issues in the past. Cc: Felix Kuehling Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: David Airlie Cc: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index e4beebb1c80a..f2e1d506ba21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -247,15 +247,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) return ret; } - ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, - O_RDWR); - if (ret < 0) { - kfifo_free(&client->fifo); - kfree(client); - return ret; - } - *fd = ret; - init_waitqueue_head(&client->wait_queue); spin_lock_init(&client->lock); client->events = 0; @@ -265,5 +256,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) list_add_rcu(&client->list, &dev->smi_clients); spin_unlock(&dev->smi_lock); + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + return 0; } From ca1198849ab0e7af5efb392ef6baf1138f6fc086 Mon Sep 17 00:00:00 2001 From: CHANDAN VURDIGERE NATARAJ Date: Tue, 29 Mar 2022 13:10:31 +0530 Subject: [PATCH 211/423] drm/amd/display: Fix by adding FPU protection for dcn30_internal_validate_bw [Why] Below general protection fault observed when WebGL Aquarium is run for longer duration. If drm debug logs are enabled and set to 0x1f then the issue is observed within 10 minutes of run. [ 100.717056] general protection fault, probably for non-canonical address 0x2d33302d32323032: 0000 [#1] PREEMPT SMP NOPTI [ 100.727921] CPU: 3 PID: 1906 Comm: DrmThread Tainted: G W 5.15.30 #12 d726c6a2d6ebe5cf9223931cbca6892f916fe18b [ 100.754419] RIP: 0010:CalculateSwathWidth+0x1f7/0x44f [ 100.767109] Code: 00 00 00 f2 42 0f 11 04 f0 48 8b 85 88 00 00 00 f2 42 0f 10 04 f0 48 8b 85 98 00 00 00 f2 42 0f 11 04 f0 48 8b 45 10 0f 57 c0 42 0f 2a 04 b0 0f 57 c9 f3 43 0f 2a 0c b4 e8 8c e2 f3 ff 48 8b [ 100.781269] RSP: 0018:ffffa9230079eeb0 EFLAGS: 00010246 [ 100.812528] RAX: 2d33302d32323032 RBX: 0000000000000500 RCX: 0000000000000000 [ 100.819656] RDX: 0000000000000001 RSI: ffff99deb712c49c RDI: 0000000000000000 [ 100.826781] RBP: ffffa9230079ef50 R08: ffff99deb712460c R09: ffff99deb712462c [ 100.833907] R10: ffff99deb7124940 R11: ffff99deb7124d70 R12: ffff99deb712ae44 [ 100.841033] R13: 0000000000000001 R14: 0000000000000000 R15: ffffa9230079f0a0 [ 100.848159] FS: 00007af121212640(0000) GS:ffff99deba780000(0000) knlGS:0000000000000000 [ 100.856240] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 100.861980] CR2: 0000209000fe1000 CR3: 000000011b18c000 CR4: 0000000000350ee0 [ 100.869106] Call Trace: [ 100.871555] [ 100.873655] ? asm_sysvec_reschedule_ipi+0x12/0x20 [ 100.878449] CalculateSwathAndDETConfiguration+0x1a3/0x6dd [ 100.883937] dml31_ModeSupportAndSystemConfigurationFull+0x2ce4/0x76da [ 100.890467] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.895173] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.899874] ? __sprint_symbol+0x80/0x135 [ 100.903883] ? dm_update_plane_state+0x3f9/0x4d2 [ 100.908500] ? symbol_string+0xb7/0xde [ 100.912250] ? number+0x145/0x29b [ 100.915566] ? vsnprintf+0x341/0x5ff [ 100.919141] ? desc_read_finalized_seq+0x39/0x87 [ 100.923755] ? update_load_avg+0x1b9/0x607 [ 100.927849] ? compute_mst_dsc_configs_for_state+0x7d/0xd5b [ 100.933416] ? fetch_pipe_params+0xa4d/0xd0c [ 100.937686] ? dc_fpu_end+0x3d/0xa8 [ 100.941175] dml_get_voltage_level+0x16b/0x180 [ 100.945619] dcn30_internal_validate_bw+0x10e/0x89b [ 100.950495] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.955285] ? resource_build_scaling_params+0x98b/0xb8c [ 100.960595] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.965384] dcn31_validate_bandwidth+0x9a/0x1fc [ 100.970001] dc_validate_global_state+0x238/0x295 [ 100.974703] amdgpu_dm_atomic_check+0x9c1/0xbce [ 100.979235] ? _printk+0x59/0x73 [ 100.982467] drm_atomic_check_only+0x403/0x78b [ 100.986912] drm_mode_atomic_ioctl+0x49b/0x546 [ 100.991358] ? drm_ioctl+0x1c1/0x3b3 [ 100.994936] ? drm_atomic_set_property+0x92a/0x92a [ 100.999725] drm_ioctl_kernel+0xdc/0x149 [ 101.003648] drm_ioctl+0x27f/0x3b3 [ 101.007051] ? drm_atomic_set_property+0x92a/0x92a [ 101.011842] amdgpu_drm_ioctl+0x49/0x7d [ 101.015679] __se_sys_ioctl+0x7c/0xb8 [ 101.015685] do_syscall_64+0x5f/0xb8 [ 101.015690] ? __irq_exit_rcu+0x34/0x96 [How] It calles populate_dml_pipes which uses doubles to initialize. Adding FPU protection avoids context switch and probable loss of vba context as there is potential contention while drm debug logs are enabled. Signed-off-by: CHANDAN VURDIGERE NATARAJ Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 338235bcef4a..63934ecf6be8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2032,7 +2032,9 @@ bool dcn31_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg if (pipe_cnt == 0) From ebc002e3ee78409c42156e62e4e27ad1d09c5a75 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 25 Mar 2022 11:53:39 -0400 Subject: [PATCH 212/423] drm/amdgpu: don't use BACO for reset in S3 Seems to cause a reboots or hangs on some systems. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1924 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1953 Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index c73fb73e9628..5504d81c77b7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -173,6 +173,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; + /* Don't use baco for reset in S3. + * This is a workaround for some platforms + * where entering BACO during suspend + * seems to cause reboots or hangs. + * This might be related to the fact that BACO controls + * power to the whole GPU including devices like audio and USB. + * Powering down/up everything may adversely affect these other + * devices. Needs more investigation. + */ + if (adev->in_s3) + return false; mutex_lock(&adev->pm.mutex); From 7e8906dc2689cbf562ce520cf4a8ba5b495db0f6 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Mon, 21 Mar 2022 10:42:34 -0400 Subject: [PATCH 213/423] drm/amd/display: remove assert for odm transition case Remove assert that will hit during odm transition case, since this is a valid case. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Eric Bernstein Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index d473708d5399..7802d603f796 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1976,7 +1976,6 @@ int dcn20_validate_apply_pipe_split_flags( /*If need split for odm but 4 way split already*/ if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) || !pipe->next_odm_pipe)) { - ASSERT(0); /* NOT expected yet */ merge[i] = true; /* 4 -> 2 ODM */ } else if (split[i] == 0 && pipe->prev_odm_pipe) { ASSERT(0); /* NOT expected yet */ From 6e93d5b0333279d8968a2972065f47a899fb58b9 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Tue, 22 Mar 2022 19:12:47 -0400 Subject: [PATCH 214/423] drm/amd/display: Add configuration options for AUX wake work around. [Why] Work around to try to wake unresponsive DP sinks may need to be adjusted for certain sinks. [How] Add options to disable work around or adjust time spent trying to wake unresponsive DPRX. Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Tom Chung Signed-off-by: Jimmy Kizito Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- drivers/gpu/drm/amd/display/dc/dc.h | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index e4df81dc1dc2..22dabe596dfc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5308,7 +5308,13 @@ static bool retrieve_link_cap(struct dc_link *link) LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); /* Try to ensure AUX channel active before proceeding. */ - status = wa_try_to_wake_dprx(link, LINK_AUX_WAKE_TIMEOUT_MS); + if (link->dc->debug.aux_wake_wa.bits.enable_wa) { + uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms; + + if (link->dc->debug.aux_wake_wa.bits.use_default_timeout) + timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS; + status = wa_try_to_wake_dprx(link, timeout_ms); + } is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9f4d926d54e7..77ef9d1f9ea8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -526,6 +526,22 @@ union dpia_debug_options { uint32_t raw; }; +/* AUX wake work around options + * 0: enable/disable work around + * 1: use default timeout LINK_AUX_WAKE_TIMEOUT_MS + * 15-2: reserved + * 31-16: timeout in ms + */ +union aux_wake_wa_options { + struct { + uint32_t enable_wa : 1; + uint32_t use_default_timeout : 1; + uint32_t rsvd: 14; + uint32_t timeout_ms : 16; + } bits; + uint32_t raw; +}; + struct dc_debug_data { uint32_t ltFailCount; uint32_t i2cErrorCount; @@ -712,6 +728,7 @@ struct dc_debug_options { #endif bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; + union aux_wake_wa_options aux_wake_wa; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; From 879791ad8bf3dc5453061cad74776a617b6e3319 Mon Sep 17 00:00:00 2001 From: Benjamin Marty Date: Wed, 23 Mar 2022 22:08:26 +0100 Subject: [PATCH 215/423] drm/amdgpu/display: change pipe policy for DCN 2.1 Fixes crash on MST Hub disconnect. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1849 Fixes: ee2698cf79cc ("drm/amd/display: Changed pipe split policy to allow for multi-display pipe split") Signed-off-by: Benjamin Marty Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 612732656772..3fe4bfbb98a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -644,7 +644,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From 28c25238898a242c58bfaff3f46a006585c2dd94 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 30 Mar 2022 16:05:50 -0400 Subject: [PATCH 216/423] drm/amd/display: update dcn315 clock table read [Why & How] Make dcn315 base its clock table off dcfclk rather than fclk. This change also adds some sanity checking to make sure an empty pmfw table does not result in invalid dal clocks. Reviewed-by: Charlene Liu Acked-by: Qingqing Zhuo Signed-off-by: Dmytro Laktyushkin Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 107 +++++++++++------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index edda572dc570..8be4c1970628 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -436,57 +436,84 @@ static void dcn315_clk_mgr_helper_populate_bw_params( struct integrated_info *bios_info, const DpmClocks_315_t *clock_table) { - int i, j; + int i; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_dispclk = 0, max_dppclk = 0; + uint32_t max_dispclk, max_dppclk, max_pstate, max_socclk, max_fclk = 0, min_pstate = 0; + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - j = -1; + max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); + max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); + max_socclk = find_max_clk_value(clock_table->SocClocks, clock_table->NumSocClkLevelsEnabled); - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ - - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { - if (clock_table->DfPstateTable[i].FClk != 0) { - j = i; - break; + /* Find highest fclk pstate */ + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { + if (clock_table->DfPstateTable[i].FClk > max_fclk) { + max_fclk = clock_table->DfPstateTable[i].FClk; + max_pstate = i; } } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } + /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */ + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; - bw_params->clk_table.num_entries = j + 1; + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { + if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i] + && clock_table->DfPstateTable[j].FClk < min_fclk) { + min_fclk = clock_table->DfPstateTable[j].FClk; + min_pstate = j; + } + } - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ - if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && - clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { - max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); - max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); - } else { - ASSERT(0); - } - - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - int temp; - - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; - bw_params->clk_table.entries[i].wck_ratio = 1; - temp = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].dcfclk_mhz = temp; - temp = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].socclk_mhz = temp; + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - } + bw_params->clk_table.entries[i].wck_ratio = 1; + }; + /* Make sure to include at least one entry and highest pstate */ + if (max_pstate != min_pstate) { + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage( + clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage( + clock_table, clock_table->SocClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = 1; + i++; + } + bw_params->clk_table.num_entries = i; + + /* Include highest socclk */ + if (bw_params->clk_table.entries[i-1].socclk_mhz < max_socclk) + bw_params->clk_table.entries[i-1].socclk_mhz = max_socclk; + + /* Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + if (!bw_params->clk_table.entries[i].fclk_mhz) { + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; + bw_params->clk_table.entries[i].voltage = def_max.voltage; + } + if (!bw_params->clk_table.entries[i].dcfclk_mhz) + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz) + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; + if (!bw_params->clk_table.entries[i].dispclk_mhz) + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; + if (!bw_params->clk_table.entries[i].dppclk_mhz) + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; + } bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; From 2f25d8ce09b7ba5d769c132ba3d4eb84a941d2cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 Apr 2022 11:08:48 -0400 Subject: [PATCH 217/423] drm/amdgpu/smu10: fix SoC/fclk units in auto mode SMU takes clock limits in Mhz units. socclk and fclk were using 10 khz units in some cases. Switch to Mhz units. Fixes higher than required SoC clocks. Fixes: 97cf32996c46d9 ("drm/amd/pm: Removed fixed clock in auto mode DPM") Reviewed-by: Paul Menzel Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9ddd8491ff00..ede71de2343d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : + (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, From 5593473a1e6c743764b08e3b6071cb43b5cfa6c4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Apr 2022 13:13:42 -0400 Subject: [PATCH 218/423] KVM: avoid NULL pointer dereference in kvm_dirty_ring_push kvm_vcpu_release() will call kvm_dirty_ring_free(), freeing ring->dirty_gfns and setting it to NULL. Afterwards, it calls kvm_arch_vcpu_destroy(). However, if closing the file descriptor races with KVM_RUN in such away that vcpu->arch.st.preempted == 0, the following call stack leads to a NULL pointer dereference in kvm_dirty_run_push(): mark_page_dirty_in_slot+0x192/0x270 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3171 kvm_steal_time_set_preempted arch/x86/kvm/x86.c:4600 [inline] kvm_arch_vcpu_put+0x34e/0x5b0 arch/x86/kvm/x86.c:4618 vcpu_put+0x1b/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:211 vmx_free_vcpu+0xcb/0x130 arch/x86/kvm/vmx/vmx.c:6985 kvm_arch_vcpu_destroy+0x76/0x290 arch/x86/kvm/x86.c:11219 kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline] The fix is to release the dirty page ring after kvm_arch_vcpu_destroy has run. Reported-by: Qiuhao Li Reported-by: Gaoning Pan Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70e05af5ebea..b22f380e3347 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -434,8 +434,8 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { - kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); + kvm_dirty_ring_free(&vcpu->dirty_ring); /* * No need for rcu_read_lock as VCPU_RUN is the only place that changes From 773f91b2cf3f52df0d7508fdbf60f37567cdaee4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 Apr 2022 17:08:21 -0400 Subject: [PATCH 219/423] SUNRPC: Fix NFSD's request deferral on RDMA transports Trond Myklebust reports an NFSD crash in svc_rdma_sendto(). Further investigation shows that the crash occurred while NFSD was handling a deferred request. This patch addresses two inter-related issues that prevent request deferral from working correctly for RPC/RDMA requests: 1. Prevent the crash by ensuring that the original svc_rqst::rq_xprt_ctxt value is available when the request is revisited. Otherwise svc_rdma_sendto() does not have a Receive context available with which to construct its reply. 2. Possibly since before commit 71641d99ce03 ("svcrdma: Properly compute .len and .buflen for received RPC Calls"), svc_rdma_recvfrom() did not include the transport header in the returned xdr_buf. There should have been no need for svc_defer() and friends to save and restore that header, as of that commit. This issue is addressed in a backport-friendly way by simply having svc_rdma_recvfrom() set rq_xprt_hlen to zero unconditionally, just as svc_tcp_recvfrom() does. This enables svc_deferred_recv() to correctly reconstruct an RPC message received via RPC/RDMA. Reported-by: Trond Myklebust Link: https://lore.kernel.org/linux-nfs/82662b7190f26fb304eb0ab1bb04279072439d4e.camel@hammerspace.com/ Signed-off-by: Chuck Lever Cc: --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc_xprt.c | 3 +++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a5dda4987e8b..217711fc9cac 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -395,6 +395,7 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; size_t xprt_hlen; int argslen; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0c117d3bfda8..b42cfffa7395 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1231,6 +1231,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; dr->xprt_hlen = rqstp->rq_xprt_hlen; + dr->xprt_ctxt = rqstp->rq_xprt_ctxt; + rqstp->rq_xprt_ctxt = NULL; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; @@ -1269,6 +1271,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_xprt_ctxt = dr->xprt_ctxt; svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad127b..864131a9fc6e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -831,7 +831,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_err; if (ret == 0) goto out_drop; - rqstp->rq_xprt_hlen = ret; + rqstp->rq_xprt_hlen = 0; if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; From aadb22ba2f656581b2f733deb3a467c48cc618f6 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 6 Apr 2022 21:04:43 +0200 Subject: [PATCH 220/423] drbd: Fix five use after free bugs in get_initial_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In get_initial_state, it calls notify_initial_state_done(skb,..) if cb->args[5]==1. If genlmsg_put() failed in notify_initial_state_done(), the skb will be freed by nlmsg_free(skb). Then get_initial_state will goto out and the freed skb will be used by return value skb->len, which is a uaf bug. What's worse, the same problem goes even further: skb can also be freed in the notify_*_state_change -> notify_*_state calls below. Thus 4 additional uaf bugs happened. My patch lets the problem callee functions: notify_initial_state_done and notify_*_state_change return an error code if errors happen. So that the error codes could be propagated and the uaf bugs can be avoid. v2 reports a compilation warning. This v3 fixed this warning and built successfully in my local environment with no additional warnings. v2: https://lore.kernel.org/patchwork/patch/1435218/ Fixes: a29728463b254 ("drbd: Backport the "events2" command") Signed-off-by: Lv Yunlong Reviewed-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 8 ++--- drivers/block/drbd/drbd_nl.c | 41 ++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 18 +++++------ drivers/block/drbd/drbd_state_change.h | 8 ++--- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4b55e864a0a3..4d3efaa20b7b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1638,22 +1638,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 02030c9c4d3b..b7216c186ba4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -4549,7 +4549,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4591,16 +4591,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4640,16 +4641,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4689,16 +4691,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4739,13 +4742,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4796,7 +4800,7 @@ fail: err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4810,11 +4814,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4841,6 +4846,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4849,32 +4855,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4889,7 +4895,10 @@ next: cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b8a27818ab3f..4ee11aef6672 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6ab..d5b0479bc9a6 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Wed, 6 Apr 2022 21:04:44 +0200 Subject: [PATCH 221/423] drbd: fix an invalid memory access caused by incorrect use of list iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug is here: idr_remove(&connection->peer_devices, vnr); If the previous for_each_connection() don't exit early (no goto hit inside the loop), the iterator 'connection' after the loop will be a bogus pointer to an invalid structure object containing the HEAD (&resource->connections). As a result, the use of 'connection' above will lead to a invalid memory access (including a possible invalid free as idr_remove could call free_layer). The original intention should have been to remove all peer_devices, but the following lines have already done the work. So just remove this line and the unneeded label, to fix this bug. Cc: stable@vger.kernel.org Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices") Signed-off-by: Xiaomeng Tong Reviewed-by: Christoph Böhmwalder Reviewed-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9676a1d214bc..d6dfa286ddb3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; } err = add_disk(disk); if (err) - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); From 286901941fd18a52b2138fddbbf589ad3639eb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Wed, 6 Apr 2022 21:04:45 +0200 Subject: [PATCH 222/423] drbd: set QUEUE_FLAG_STABLE_WRITES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want our pages not to change while they are being written. Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d6dfa286ddb3..4b0b25cc916e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2719,6 +2719,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ From 0f525289ff0ddeb380813bd81e0f9bdaaa1c9078 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Apr 2022 21:44:02 +0200 Subject: [PATCH 223/423] fbdev: Fix unregistering of framebuffers without device OF framebuffers do not have an underlying device in the Linux device hierarchy. Do a regular unregister call instead of hot unplugging such a non-existing device. Fixes a NULL dereference. An example error message on ppc64le is shown below. BUG: Kernel NULL pointer dereference on read at 0x00000060 Faulting instruction address: 0xc00000000080dfa4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries [...] CPU: 2 PID: 139 Comm: systemd-udevd Not tainted 5.17.0-ae085d7f9365 #1 NIP: c00000000080dfa4 LR: c00000000080df9c CTR: c000000000797430 REGS: c000000004132fe0 TRAP: 0300 Not tainted (5.17.0-ae085d7f9365) MSR: 8000000002009033 CR: 28228282 XER: 20000000 CFAR: c00000000000c80c DAR: 0000000000000060 DSISR: 40000000 IRQMASK: 0 GPR00: c00000000080df9c c000000004133280 c00000000169d200 0000000000000029 GPR04: 00000000ffffefff c000000004132f90 c000000004132f88 0000000000000000 GPR08: c0000000015658f8 c0000000015cd200 c0000000014f57d0 0000000048228283 GPR12: 0000000000000000 c00000003fffe300 0000000020000000 0000000000000000 GPR16: 0000000000000000 0000000113fc4a40 0000000000000005 0000000113fcfb80 GPR20: 000001000f7283b0 0000000000000000 c000000000e4a588 c000000000e4a5b0 GPR24: 0000000000000001 00000000000a0000 c008000000db0168 c0000000021f6ec0 GPR28: c0000000016d65a8 c000000004b36460 0000000000000000 c0000000016d64b0 NIP [c00000000080dfa4] do_remove_conflicting_framebuffers+0x184/0x1d0 [c000000004133280] [c00000000080df9c] do_remove_conflicting_framebuffers+0x17c/0x1d0 (unreliable) [c000000004133350] [c00000000080e4d0] remove_conflicting_framebuffers+0x60/0x150 [c0000000041333a0] [c00000000080e6f4] remove_conflicting_pci_framebuffers+0x134/0x1b0 [c000000004133450] [c008000000e70438] drm_aperture_remove_conflicting_pci_framebuffers+0x90/0x100 [drm] [c000000004133490] [c008000000da0ce4] bochs_pci_probe+0x6c/0xa64 [bochs] [...] [c000000004133db0] [c00000000002aaa0] system_call_exception+0x170/0x2d0 [c000000004133e10] [c00000000000c3cc] system_call_common+0xec/0x250 The bug [1] was introduced by commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Most firmware framebuffers have an underlying platform device, which can be hot-unplugged before loading the native graphics driver. OF framebuffers do not (yet) have that device. Fix the code by unregistering the framebuffer as before without a hot unplug. Tested with 5.17 on qemu ppc64le emulation. Signed-off-by: Thomas Zimmermann Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Reported-by: Sudip Mukherjee Reviewed-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Tested-by: Sudip Mukherjee Cc: Zack Rusin Cc: Javier Martinez Canillas Cc: Hans de Goede Cc: stable@vger.kernel.org # v5.11+ Cc: Helge Deller Cc: Daniel Vetter Cc: Sam Ravnborg Cc: Zheyu Ma Cc: Xiyu Yang Cc: Zhen Lei Cc: Matthew Wilcox Cc: Alex Deucher Cc: Tetsuo Handa Cc: Guenter Roeck Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/all/YkHXO6LGHAN0p1pq@debian/ # [1] Link: https://patchwork.freedesktop.org/patch/msgid/20220404194402.29974-1-tzimmermann@suse.de --- drivers/video/fbdev/core/fbmem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 34d6bb1bf82e..a6bb0e438216 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1579,7 +1579,14 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a, * If it's not a platform device, at least print a warning. A * fix would add code to remove the device from the system. */ - if (dev_is_platform(device)) { + if (!device) { + /* TODO: Represent each OF framebuffer as its own + * device in the device hierarchy. For now, offb + * doesn't have such a device, so unregister the + * framebuffer as before without warning. + */ + do_unregister_framebuffer(registered_fb[i]); + } else if (dev_is_platform(device)) { registered_fb[i]->forced_out = true; platform_device_unregister(to_platform_device(device)); } else { From ffa0b64e3be58519ae472ea29a1a1ad681e32f48 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 Apr 2022 00:57:57 +1000 Subject: [PATCH 224/423] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit mpe: On 64-bit Book3E vmalloc space starts at 0x8000000000000000. Because of the way __pa() works we have: __pa(0x8000000000000000) == 0, and therefore virt_to_pfn(0x8000000000000000) == 0, and therefore virt_addr_valid(0x8000000000000000) == true Which is wrong, virt_addr_valid() should be false for vmalloc space. In fact all vmalloc addresses that alias with a valid PFN will return true from virt_addr_valid(). That can cause bugs with hardened usercopy as described below by Kefeng Wang: When running ethtool eth0 on 64-bit Book3E, a BUG occurred: usercopy: Kernel memory exposure attempt detected from SLUB object not in SLUB page?! (offset 0, size 1048)! kernel BUG at mm/usercopy.c:99 ... usercopy_abort+0x64/0xa0 (unreliable) __check_heap_object+0x168/0x190 __check_object_size+0x1a0/0x200 dev_ethtool+0x2494/0x2b20 dev_ioctl+0x5d0/0x770 sock_do_ioctl+0xf0/0x1d0 sock_ioctl+0x3ec/0x5a0 __se_sys_ioctl+0xf0/0x160 system_call_exception+0xfc/0x1f0 system_call_common+0xf8/0x200 The code shows below, data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true on 64-bit Book3E, which leads to the panic. As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in the virt_addr_valid() for 64-bit, also add upper limit check to make sure the virt is below high_memory. Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start of lowmem, high_memory is the upper low virtual address, the check is suitable for 32-bit, this will fix the issue mentioned in commit 602946ec2f90 ("powerpc: Set max_mapnr correctly") too. On 32-bit there is a similar problem with high memory, that was fixed in commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that commit breaks highmem and needs to be reverted. We can't easily fix __pa(), we have code that relies on its current behaviour. So for now add extra checks to virt_addr_valid(). For 64-bit Book3S the extra checks are not necessary, the combination of virt_to_pfn() and pfn_valid() should yield the correct result, but they are harmless. Signed-off-by: Kefeng Wang Reviewed-by: Christophe Leroy [mpe: Add additional change log detail] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220406145802.538416-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/page.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 254687258f42..f2c5c26869f1 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't From 1ff5c8e8c835e8a81c0868e3050c76563dd56a2c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 Apr 2022 00:57:58 +1000 Subject: [PATCH 225/423] Revert "powerpc: Set max_mapnr correctly" This reverts commit 602946ec2f90d5bd965857753880db29d2d9a1e9. If CONFIG_HIGHMEM is enabled, no highmem will be added with max_mapnr set to max_low_pfn, see mem_init(): for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { ... free_highmem_page(); } Now that virt_addr_valid() has been fixed in the previous commit, we can revert the change to max_mapnr. Fixes: 602946ec2f90 ("powerpc: Set max_mapnr correctly") Signed-off-by: Kefeng Wang Reviewed-by: Christophe Leroy Reported-by: Erhard F. [mpe: Update change log to reflect series reordering] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220406145802.538416-2-mpe@ellerman.id.au --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8e301cd8925b..4d221d033804 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -255,7 +255,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_low_pfn); + set_max_mapnr(max_pfn); kasan_late_init(); From 1ecc0c09f19f8e10a2c52676f8ca47c28c9f73c7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 4 Apr 2022 21:21:05 +0200 Subject: [PATCH 226/423] dt-bindings: display: panel: mipi-dbi-spi: Make width-mm/height-mm mandatory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the width-mm/height-mm panel properties mandatory to correctly report the panel dimensions to the OS. Fixes: 2f3468b82db97 ("dt-bindings: display: add bindings for MIPI DBI compatible SPI panels") Signed-off-by: Marek Vasut Cc: Christoph Niedermaier Cc: Daniel Vetter Cc: Dmitry Osipenko Cc: Laurent Pinchart Cc: Noralf Trønnes Cc: Rob Herring Cc: Robert Foss Cc: Sam Ravnborg Cc: Thomas Zimmermann Cc: devicetree@vger.kernel.org To: dri-devel@lists.freedesktop.org Acked-by: Noralf Trønnes Reviewed-by: Laurent Pinchart Acked-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20220404192105.12547-1-marex@denx.de --- .../devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml index f29789994b18..c2df8d28aaf5 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml @@ -83,6 +83,8 @@ properties: required: - compatible - reg + - width-mm + - height-mm - panel-timing unevaluatedProperties: false From 6a0d0ae3e8b533d6de627c814c60264b9a85bad6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2022 13:55:47 +0900 Subject: [PATCH 227/423] scsi: scsi_debug: Fix sdebug_blk_mq_poll() in_use_bm bitmap use The in_use_bm bitmap of struct sdebug_queue should be accessed under protection of the qc_lock spinlock. Make sure that this lock is taken before calling find_first_bit() at the beginning of the function sdebug_blk_mq_poll(). Link: https://lore.kernel.org/r/20220404045547.579887-1-damien.lemoal@opensource.wdc.com Fixes: 3fd07aecb750 ("scsi: scsi_debug: Fix qc_lock use in sdebug_blk_mq_poll()") Acked-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index c607755cce00..ff78ef702f22 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7519,12 +7519,13 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) struct sdebug_defer *sd_dp; sqp = sdebug_q_arr + queue_num; - qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue); - if (qc_idx >= sdebug_max_queue) - return 0; spin_lock_irqsave(&sqp->qc_lock, iflags); + qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue); + if (qc_idx >= sdebug_max_queue) + goto unlock; + for (first = true; first || qc_idx + 1 < sdebug_max_queue; ) { if (first) { first = false; @@ -7589,6 +7590,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) break; } +unlock: spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (num_entries > 0) From 6eaa77144b90582cef7f1fc346f11df51f9f83d5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2022 14:00:41 +0900 Subject: [PATCH 228/423] scsi: mpt3sas: Fix mpt3sas_check_same_4gb_region() kdoc comment The start_addres argument of mpt3sas_check_same_4gb_region() was misnamed in the function kdoc comment, resulting in the following warning when compiling with W=1. drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Function parameter or member 'start_address' not described in 'mpt3sas_check_same_4gb_region' drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Excess function parameter 'reply_pool_start_address' description in 'mpt3sas_check_same_4gb_region' Fix the argument name in the function kdoc comment to avoid it. While at it, remove a useless blank line between the kdoc and function code. Link: https://lore.kernel.org/r/20220404050041.594774-1-damien.lemoal@opensource.wdc.com Acked-by: Sreekanth Reddy Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b57f1803371e..538d2c0cd971 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5716,13 +5716,12 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) /** * mpt3sas_check_same_4gb_region - checks whether all reply queues in a set are * having same upper 32bits in their base memory address. - * @reply_pool_start_address: Base address of a reply queue set + * @start_address: Base address of a reply queue set * @pool_sz: Size of single Reply Descriptor Post Queues pool size * * Return: 1 if reply queues in a set have a same upper 32bits in their base * memory address, else 0. */ - static int mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz) { From 4049f7acef3eb37c1ea0df45f3ffc29404f4e708 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Apr 2022 08:50:38 +0300 Subject: [PATCH 229/423] scsi: ufs: ufs-pci: Add support for Intel MTL Add PCI ID and callbacks to support Intel Meteor Lake (MTL). Link: https://lore.kernel.org/r/20220404055038.2208051-1-adrian.hunter@intel.com Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Avri Altman Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index f76692053ca1..e892b9feffb1 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -428,6 +428,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static int ufs_intel_mtl_init(struct ufs_hba *hba) +{ + hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; + return ufs_intel_common_init(hba); +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -465,6 +471,16 @@ static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = { .device_reset = ufs_intel_device_reset, }; +static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_mtl_init, + .exit = ufs_intel_common_exit, + .hce_enable_notify = ufs_intel_hce_enable_notify, + .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; + #ifdef CONFIG_PM_SLEEP static int ufshcd_pci_restore(struct device *dev) { @@ -579,6 +595,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops }, { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x7E47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { } /* terminate list */ }; From 75f5a0c4744c7880f1ceceb8b22e3751bf1d4166 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 6 Apr 2022 19:05:39 +1000 Subject: [PATCH 230/423] scsi: sym53c500_cs: Stop using struct scsi_pointer This driver doesn't use SCp.ptr to save a SCSI command data pointer which means "scsi pointer" is a complete misnomer here. Only a few members of struct scsi_pointer are needed so move those to private command data. Link: https://lore.kernel.org/r/accf71e293ba3aed6d18c8baeb405de8dfe7c935.1649235939.git.fthain@linux-m68k.org Cc: Bart Van Assche Cc: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/pcmcia/sym53c500_cs.c | 52 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index c4a838635893..5d7dfefd6f6c 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -192,10 +192,11 @@ struct sym53c500_data { int fast_pio; }; -static struct scsi_pointer *sym53c500_scsi_pointer(struct scsi_cmnd *cmd) -{ - return scsi_cmd_priv(cmd); -} +struct sym53c500_cmd_priv { + int status; + int message; + int phase; +}; enum Phase { idle, @@ -356,7 +357,7 @@ SYM53C500_intr(int irq, void *dev_id) struct sym53c500_data *data = (struct sym53c500_data *)dev->hostdata; struct scsi_cmnd *curSC = data->current_SC; - struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(curSC); + struct sym53c500_cmd_priv *scp = scsi_cmd_priv(curSC); int fast_pio = data->fast_pio; spin_lock_irqsave(dev->host_lock, flags); @@ -403,12 +404,11 @@ SYM53C500_intr(int irq, void *dev_id) if (int_reg & 0x20) { /* Disconnect */ DEB(printk("SYM53C500: disconnect intr received\n")); - if (scsi_pointer->phase != message_in) { /* Unexpected disconnect */ + if (scp->phase != message_in) { /* Unexpected disconnect */ curSC->result = DID_NO_CONNECT << 16; } else { /* Command complete, return status and message */ - curSC->result = (scsi_pointer->Status & 0xff) | - ((scsi_pointer->Message & 0xff) << 8) | - (DID_OK << 16); + curSC->result = (scp->status & 0xff) | + ((scp->message & 0xff) << 8) | (DID_OK << 16); } goto idle_out; } @@ -419,7 +419,7 @@ SYM53C500_intr(int irq, void *dev_id) struct scatterlist *sg; int i; - scsi_pointer->phase = data_out; + scp->phase = data_out; VDEB(printk("SYM53C500: Data-Out phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ @@ -438,7 +438,7 @@ SYM53C500_intr(int irq, void *dev_id) struct scatterlist *sg; int i; - scsi_pointer->phase = data_in; + scp->phase = data_in; VDEB(printk("SYM53C500: Data-In phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ @@ -453,12 +453,12 @@ SYM53C500_intr(int irq, void *dev_id) break; case 0x02: /* COMMAND */ - scsi_pointer->phase = command_ph; + scp->phase = command_ph; printk("SYM53C500: Warning: Unknown interrupt occurred in command phase!\n"); break; case 0x03: /* STATUS */ - scsi_pointer->phase = status_ph; + scp->phase = status_ph; VDEB(printk("SYM53C500: Status phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); outb(INIT_CMD_COMPLETE, port_base + CMD_REG); @@ -471,24 +471,22 @@ SYM53C500_intr(int irq, void *dev_id) case 0x06: /* MESSAGE-OUT */ DEB(printk("SYM53C500: Message-Out phase\n")); - scsi_pointer->phase = message_out; + scp->phase = message_out; outb(SET_ATN, port_base + CMD_REG); /* Reject the message */ outb(MSG_ACCEPT, port_base + CMD_REG); break; case 0x07: /* MESSAGE-IN */ VDEB(printk("SYM53C500: Message-In phase\n")); - scsi_pointer->phase = message_in; + scp->phase = message_in; - scsi_pointer->Status = inb(port_base + SCSI_FIFO); - scsi_pointer->Message = inb(port_base + SCSI_FIFO); + scp->status = inb(port_base + SCSI_FIFO); + scp->message = inb(port_base + SCSI_FIFO); VDEB(printk("SCSI FIFO size=%d\n", inb(port_base + FIFO_FLAGS) & 0x1f)); - DEB(printk("Status = %02x Message = %02x\n", - scsi_pointer->Status, scsi_pointer->Message)); + DEB(printk("Status = %02x Message = %02x\n", scp->status, scp->message)); - if (scsi_pointer->Message == SAVE_POINTERS || - scsi_pointer->Message == DISCONNECT) { + if (scp->message == SAVE_POINTERS || scp->message == DISCONNECT) { outb(SET_ATN, port_base + CMD_REG); /* Reject message */ DEB(printk("Discarding SAVE_POINTERS message\n")); } @@ -500,7 +498,7 @@ out: return IRQ_HANDLED; idle_out: - scsi_pointer->phase = idle; + scp->phase = idle; scsi_done(curSC); goto out; } @@ -548,7 +546,7 @@ SYM53C500_info(struct Scsi_Host *SChost) static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt) { - struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(SCpnt); + struct sym53c500_cmd_priv *scp = scsi_cmd_priv(SCpnt); int i; int port_base = SCpnt->device->host->io_port; struct sym53c500_data *data = @@ -565,9 +563,9 @@ static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt) VDEB(printk("\n")); data->current_SC = SCpnt; - scsi_pointer->phase = command_ph; - scsi_pointer->Status = 0; - scsi_pointer->Message = 0; + scp->phase = command_ph; + scp->status = 0; + scp->message = 0; /* We are locked here already by the mid layer */ REG0(port_base); @@ -682,7 +680,7 @@ static struct scsi_host_template sym53c500_driver_template = { .this_id = 7, .sg_tablesize = 32, .shost_groups = SYM53C500_shost_groups, - .cmd_size = sizeof(struct scsi_pointer), + .cmd_size = sizeof(struct sym53c500_cmd_priv), }; static int SYM53C500_config_check(struct pcmcia_device *p_dev, void *priv_data) From f61eb1216c959f93ffabd3b8781fa5b2b22f8907 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 5 Apr 2022 17:36:37 +0530 Subject: [PATCH 231/423] scsi: mpt3sas: Fail reset operation if config request timed out As part of controller reset operation the driver issues a config request command. If this command gets times out, then fail the controller reset operation instead of retrying it. Link: https://lore.kernel.org/r/20220405120637.20528-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_config.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index 0563078227de..a8dd14c91efd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t retry_count++; if (ioc->config_cmds.smid == smid) mpt3sas_base_free_smid(ioc, smid); - if ((ioc->shost_recovery) || (ioc->config_cmds.status & - MPT3_CMD_RESET) || ioc->pci_error_recovery) + if (ioc->config_cmds.status & MPT3_CMD_RESET) goto retry_config; - issue_host_reset = 1; + if (ioc->shost_recovery || ioc->pci_error_recovery) { + issue_host_reset = 0; + r = -EFAULT; + } else + issue_host_reset = 1; goto free_mem; } From 1700714b1ff252b634db21186db4d91e7e006043 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 2 Mar 2022 00:35:57 -0500 Subject: [PATCH 232/423] scsi: sd: sd_read_cpr() requires VPD pages As such it should be called inside the scsi_device_supports_vpd() conditional. Link: https://lore.kernel.org/r/20220302053559.32147-13-martin.petersen@oracle.com Fixes: e815d36548f0 ("scsi: sd: add concurrent positioning ranges support") Cc: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a390679cf458..cecba3fcbc61 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3216,6 +3216,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits(sdkp); sd_read_block_characteristics(sdkp); sd_zbc_read_zones(sdkp, buffer); + sd_read_cpr(sdkp); } sd_print_capacity(sdkp, old_capacity); @@ -3225,7 +3226,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); - sd_read_cpr(sdkp); } /* From 5f2bce1e222028dc1c15f130109a17aa654ae6e8 Mon Sep 17 00:00:00 2001 From: Alexey Galakhov Date: Wed, 9 Mar 2022 22:25:35 +0100 Subject: [PATCH 233/423] scsi: mvsas: Add PCI ID of RocketRaid 2640 The HighPoint RocketRaid 2640 is a low-cost SAS controller based on Marvell chip. The chip in question was already supported by the kernel, just the PCI ID of this particular board was missing. Link: https://lore.kernel.org/r/20220309212535.402987-1-agalakhov@gmail.com Signed-off-by: Alexey Galakhov Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 7ac63eb5ccd3..2fde496fff5f 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -647,6 +647,7 @@ static struct pci_device_id mvs_pci_table[] = { { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 }, { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 }, { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 }, + { PCI_VDEVICE(TTI, 0x2640), chip_6440 }, { PCI_VDEVICE(TTI, 0x2710), chip_9480 }, { PCI_VDEVICE(TTI, 0x2720), chip_9480 }, { PCI_VDEVICE(TTI, 0x2721), chip_9480 }, From 61144d83376a136d8aa7a9e057d916c505bfb75f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Mar 2022 00:39:27 +0000 Subject: [PATCH 234/423] scsi: message: fusion: Remove redundant variable dmp Variable dmp is being assigned a value that is never read, the variable is redundant and can be removed. Cleans up clang scan build warning: drivers/message/fusion/mptbase.c:6667:39: warning: Although the value stored to 'dmp' is used in the enclosing expression, the value is never actually read from 'dmp' [deadcode.DeadStores] Link: https://lore.kernel.org/r/20220318003927.81471-1-colin.i.king@gmail.com Reviewed-by: Nick Desaulniers Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptbase.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index e90adfa57950..9b3ba2df71c7 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -6658,13 +6658,13 @@ static int mpt_summary_proc_show(struct seq_file *m, void *v) static int mpt_version_proc_show(struct seq_file *m, void *v) { u8 cb_idx; - int scsi, fc, sas, lan, ctl, targ, dmp; + int scsi, fc, sas, lan, ctl, targ; char *drvname; seq_printf(m, "%s-%s\n", "mptlinux", MPT_LINUX_VERSION_COMMON); seq_printf(m, " Fusion MPT base driver\n"); - scsi = fc = sas = lan = ctl = targ = dmp = 0; + scsi = fc = sas = lan = ctl = targ = 0; for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) { drvname = NULL; if (MptCallbacks[cb_idx]) { From 03252259e18e63eb56a0d29c2fefcc30b58b812b Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 31 Mar 2022 21:10:19 -0400 Subject: [PATCH 235/423] scsi: sd: Clean up gendisk if device_add_disk() failed We forgot to call blk_cleanup_disk() when device_add_disk() failed. This would cause a memory leak of gendisk and sched_tags allocated in elevator_init_mq() Reference:https://syzkaller.appspot.com/x/log.txt?x=13b41dcb700000 Reported-and-tested-by: syzbot+f08c77040fa163a75a46@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220401011018.1026553-1-haowenchao@huawei.com Signed-off-by: Wenchao Hao Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cecba3fcbc61..dc6e55761fd1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3475,6 +3475,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { put_device(&sdkp->disk_dev); + blk_cleanup_disk(gd); goto out; } From bfb7789bcbd901caead43861461bc8f334c90d3b Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 20 Mar 2022 23:07:33 +0800 Subject: [PATCH 236/423] scsi: ufs: ufshpb: Fix a NULL check on list iterator The list iterator is always non-NULL so the check 'if (!rgn)' is always false and the dev_err() is never called. Move the check outside the loop and determine if 'victim_rgn' is NULL, to fix this bug. Link: https://lore.kernel.org/r/20220320150733.21824-1-xiam0nd.tong@gmail.com Fixes: 4b5f49079c52 ("scsi: ufs: ufshpb: L2P map management for HPB read") Reviewed-by: Daejun Park Signed-off-by: Xiaomeng Tong Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshpb.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index b2bec19022cd..81099b68bbfb 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -867,12 +867,6 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) struct ufshpb_region *rgn, *victim_rgn = NULL; list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) { - if (!rgn) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "%s: no region allocated\n", - __func__); - return NULL; - } if (ufshpb_check_srgns_issue_state(hpb, rgn)) continue; @@ -888,6 +882,11 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) break; } + if (!victim_rgn) + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "%s: no region allocated\n", + __func__); + return victim_rgn; } From 56495f295d8e021f77d065b890fc0100e3f9f6d8 Mon Sep 17 00:00:00 2001 From: Chandrakanth patil Date: Thu, 24 Mar 2022 02:47:11 -0700 Subject: [PATCH 237/423] scsi: megaraid_sas: Target with invalid LUN ID is deleted during scan The megaraid_sas driver supports single LUN for RAID devices. That is LUN 0. All other LUNs are unsupported. When a device scan on a logical target with invalid LUN number is invoked through sysfs, that target ends up getting removed. Add LUN ID validation in the slave destroy function to avoid the target deletion. Link: https://lore.kernel.org/r/20220324094711.48833-1-chandrakanth.patil@broadcom.com Signed-off-by: Chandrakanth patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 3 +++ drivers/scsi/megaraid/megaraid_sas_base.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 611871ef15b5..4919ea54b827 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2560,6 +2560,9 @@ struct megasas_instance_template { #define MEGASAS_IS_LOGICAL(sdev) \ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) +#define MEGASAS_IS_LUN_VALID(sdev) \ + (((sdev)->lun == 0) ? 1 : 0) + #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ scp->device->id) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8bf72dbc33b7..db6793608447 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) goto scan_target; } return -ENXIO; + } else if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return -ENXIO; } scan_target: @@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev) instance = megasas_lookup_instance(sdev->host->host_no); if (MEGASAS_IS_LOGICAL(sdev)) { + if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return; + } ld_tgt_id = MEGASAS_TARGET_ID(sdev); instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; if (megasas_dbg_lvl & LD_PD_DEBUG) From 02de9331c4d0c6bddac9c5fa66d91f70adf8612b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 16 Mar 2022 13:51:29 +0100 Subject: [PATCH 238/423] KVM: selftests: get-reg-list: Add KVM_REG_ARM_FW_REG(3) When testing a kernel with commit a5905d6af492 ("KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated") get-reg-list output vregs: Number blessed registers: 234 vregs: Number registers: 238 vregs: There are 1 new registers. Consider adding them to the blessed reg list with the following lines: KVM_REG_ARM_FW_REG(3), vregs: PASS ... That output inspired two changes: 1) add the new register to the blessed list and 2) explain why "Number registers" is actually four larger than "Number blessed registers" (on the system used for testing), even though only one register is being stated as new. The reason is that some registers are host dependent and they get filtered out when comparing with the blessed list. The system used for the test apparently had three filtered registers. Signed-off-by: Andrew Jones Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220316125129.392128-1-drjones@redhat.com --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index f12147c43464..0b571f3fe64c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c) ++missing_regs; if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); } if (new_regs) { @@ -683,9 +688,10 @@ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), - KVM_REG_ARM_FW_REG(0), - KVM_REG_ARM_FW_REG(1), - KVM_REG_ARM_FW_REG(2), + KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ + KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ + KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ + KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 2), From a44a4cc1c969afec97dbb2aedaf6f38eaa6253bb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:13 +0000 Subject: [PATCH 239/423] KVM: Don't create VM debugfs files outside of the VM directory Unfortunately, there is no guarantee that KVM was able to instantiate a debugfs directory for a particular VM. To that end, KVM shouldn't even attempt to create new debugfs files in this case. If the specified parent dentry is NULL, debugfs_create_file() will instantiate files at the root of debugfs. For arm64, it is possible to create the vgic-state file outside of a VM directory, the file is not cleaned up when a VM is destroyed. Nonetheless, the corresponding struct kvm is freed when the VM is destroyed. Nip the problem in the bud for all possible errant debugfs file creations by initializing kvm->debugfs_dentry to -ENOENT. In so doing, debugfs_create_file() will fail instead of creating the file in the root directory. Cc: stable@kernel.org Fixes: 929f45e32499 ("kvm: no need to check return value of debugfs_create functions") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-2-oupton@google.com --- virt/kvm/kvm_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70e05af5ebea..e39a6f56fc47 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -932,7 +932,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; - if (!kvm->debugfs_dentry) + if (IS_ERR(kvm->debugfs_dentry)) return; debugfs_remove_recursive(kvm->debugfs_dentry); @@ -955,6 +955,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; + /* + * Force subsequent debugfs file creations to fail if the VM directory + * is not created. + */ + kvm->debugfs_dentry = ERR_PTR(-ENOENT); + if (!debugfs_initialized()) return 0; @@ -5479,7 +5485,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { From 386ba265a8197716076a88853244f4437b92b167 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:14 +0000 Subject: [PATCH 240/423] selftests: KVM: Don't leak GIC FD across dirty log test iterations dirty_log_perf_test instantiates a VGICv3 for the guest (if supported by hardware) to reduce the overhead of guest exits. However, the test does not actually close the GIC fd when cleaning up the VM between test iterations, meaning that the VM is never actually destroyed in the kernel. While this is generally a bad idea, the bug was detected from the kernel spewing about duplicate debugfs entries as subsequent VMs happen to reuse the same FD even though the debugfs directory is still present. Abstract away the notion of setup/cleanup of the GIC FD from the test by creating arch-specific helpers for test setup/cleanup. Close the GIC FD on VM cleanup and do nothing for the other architectures. Fixes: c340f7899af6 ("KVM: selftests: Add vgic initialization for dirty log perf test for ARM") Reviewed-by: Jing Zhang Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-3-oupton@google.com --- .../selftests/kvm/dirty_log_perf_test.c | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index c9d9e513ca04..7b47ae4f952e 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,11 +18,40 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" + #ifdef __aarch64__ #include "aarch64/vgic.h" #define GICD_BASE_GPA 0x8000000ULL #define GICR_BASE_GPA 0x80A0000ULL + +static int gic_fd; + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + /* + * The test can still run even if hardware does not support GICv3, as it + * is only an optimization to reduce guest exits. + */ + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ + if (gic_fd > 0) + close(gic_fd); +} + +#else /* __aarch64__ */ + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ +} + #endif /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ @@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } -#ifdef __aarch64__ - vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); -#endif + arch_setup_vm(vm, nr_vcpus); /* Start the iterations */ iteration = 0; @@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); + arch_cleanup_vm(vm); perf_test_destroy_vm(vm); } From 21db83846683d3987666505a3ec38f367708199a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:15 +0000 Subject: [PATCH 241/423] selftests: KVM: Free the GIC FD when cleaning up in arch_timer In order to correctly destroy a VM, all references to the VM must be freed. The arch_timer selftest creates a VGIC for the guest, which itself holds a reference to the VM. Close the GIC FD when cleaning up a VM. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-4-oupton@google.com --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index b08d30bf71c5..3b940a101bc0 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; - int ret; int nr_vcpus = test_args.nr_vcpus; vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); @@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void) ucall_init(vm, NULL); test_init_timer_irq(vm); - ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); - if (ret < 0) { + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + if (gic_fd < 0) { print_skip("Failed to create vgic-v3"); exit(KSFT_SKIP); } @@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void) return vm; } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n", @@ -478,7 +485,7 @@ int main(int argc, char *argv[]) vm = test_vm_create(); test_run(vm); - kvm_vm_free(vm); + test_vm_cleanup(vm); return 0; } From 9eb6f5c388060d8cef3c8b616cc31b765e022359 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Tue, 5 Apr 2022 12:20:29 -0600 Subject: [PATCH 242/423] ALSA: hda/realtek: Add quirk for Clevo PD50PNT Fixes speaker output and headset detection on Clevo PD50PNT. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20220405182029.27431-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aace474a899d..61df440fdb61 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2619,6 +2619,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), From 9dd7c46346ca4390f84a7ea9933005eb1b175c15 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Apr 2022 16:41:18 -0700 Subject: [PATCH 243/423] sound/oss/dmasound: fix build when drivers are mixed =y/=m When CONFIG_DMASOUND_ATARI=m and CONFIG_DMASOUND_Q40=y (or vice versa), dmasound_core.o can be built without dmasound_deinit() being defined, causing a build error: ERROR: modpost: "dmasound_deinit" [sound/oss/dmasound/dmasound_atari.ko] undefined! Modify dmasound_core.c and dmasound.h so that dmasound_deinit() is always available. The mixed modes (=y/=m) also mean that several variables and structs have to be declared in all cases. Suggested-by: Arnd Bergmann Suggested-by: Geert Uytterhoeven Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202204032138.EFT9qGEd-lkp@intel.com Cc: Geert Uytterhoeven Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Link: https://lore.kernel.org/r/20220405234118.24830-1-rdunlap@infradead.org Signed-off-by: Takashi Iwai --- sound/oss/dmasound/dmasound.h | 6 ------ sound/oss/dmasound/dmasound_core.c | 24 +----------------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h index c1c52b479da2..ad8ce6a1c25c 100644 --- a/sound/oss/dmasound/dmasound.h +++ b/sound/oss/dmasound/dmasound.h @@ -88,11 +88,7 @@ static inline int ioctl_return(int __user *addr, int value) */ extern int dmasound_init(void); -#ifdef MODULE extern void dmasound_deinit(void); -#else -#define dmasound_deinit() do { } while (0) -#endif /* description of the set-up applies to either hard or soft settings */ @@ -114,9 +110,7 @@ typedef struct { void *(*dma_alloc)(unsigned int, gfp_t); void (*dma_free)(void *, unsigned int); int (*irqinit)(void); -#ifdef MODULE void (*irqcleanup)(void); -#endif void (*init)(void); void (*silence)(void); int (*setFormat)(int); diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 0c95828ac0b1..9c48f3a9e3d1 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -206,12 +206,10 @@ module_param(writeBufSize, int, 0); MODULE_LICENSE("GPL"); -#ifdef MODULE static int sq_unit = -1; static int mixer_unit = -1; static int state_unit = -1; static int irq_installed; -#endif /* MODULE */ /* control over who can modify resources shared between play/record */ static fmode_t shared_resource_owner; @@ -391,9 +389,6 @@ static const struct file_operations mixer_fops = static void mixer_init(void) { -#ifndef MODULE - int mixer_unit; -#endif mixer_unit = register_sound_mixer(&mixer_fops, -1); if (mixer_unit < 0) return; @@ -1171,9 +1166,6 @@ static const struct file_operations sq_fops = static int sq_init(void) { const struct file_operations *fops = &sq_fops; -#ifndef MODULE - int sq_unit; -#endif sq_unit = register_sound_dsp(fops, -1); if (sq_unit < 0) { @@ -1366,9 +1358,6 @@ static const struct file_operations state_fops = { static int state_init(void) { -#ifndef MODULE - int state_unit; -#endif state_unit = register_sound_special(&state_fops, SND_DEV_STATUS); if (state_unit < 0) return state_unit ; @@ -1386,10 +1375,9 @@ static int state_init(void) int dmasound_init(void) { int res ; -#ifdef MODULE + if (irq_installed) return -EBUSY; -#endif /* Set up sound queue, /dev/audio and /dev/dsp. */ @@ -1408,9 +1396,7 @@ int dmasound_init(void) printk(KERN_ERR "DMA sound driver: Interrupt initialization failed\n"); return -ENODEV; } -#ifdef MODULE irq_installed = 1; -#endif printk(KERN_INFO "%s DMA sound driver rev %03d installed\n", dmasound.mach.name, (DMASOUND_CORE_REVISION<<4) + @@ -1424,8 +1410,6 @@ int dmasound_init(void) return 0; } -#ifdef MODULE - void dmasound_deinit(void) { if (irq_installed) { @@ -1444,8 +1428,6 @@ void dmasound_deinit(void) unregister_sound_dsp(sq_unit); } -#else /* !MODULE */ - static int dmasound_setup(char *str) { int ints[6], size; @@ -1489,8 +1471,6 @@ static int dmasound_setup(char *str) __setup("dmasound=", dmasound_setup); -#endif /* !MODULE */ - /* * Conversion tables */ @@ -1577,9 +1557,7 @@ char dmasound_alaw2dma8[] = { EXPORT_SYMBOL(dmasound); EXPORT_SYMBOL(dmasound_init); -#ifdef MODULE EXPORT_SYMBOL(dmasound_deinit); -#endif EXPORT_SYMBOL(dmasound_write_sq); EXPORT_SYMBOL(dmasound_catchRadius); #ifdef HAS_8BIT_TABLES From d52eee988597ac2a2c5d17d842946616d7d41070 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 6 Apr 2022 14:04:18 -0500 Subject: [PATCH 244/423] ALSA: hda: intel-dsp-config: update AlderLake PCI IDs Add missing AlderLake-PS and RaptorLake-S PCI IDs (already in HDaudio and SOF drivers), add comments and regroup by skew. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220406190418.245044-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 70fd8b13938e..8b0a16ba27d3 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -390,22 +390,36 @@ static const struct config_entry config_table[] = { /* Alder Lake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE) + /* Alderlake-S */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x7ad0, }, + /* RaptorLake-S */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x7a50, + }, + /* Alderlake-P */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x51c8, }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x51cc, - }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x51cd, }, + /* Alderlake-PS */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x51c9, + }, + /* Alderlake-M */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x51cc, + }, + /* Alderlake-N */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x54c8, From be8a096521ca1a252bf078b347f96ce94582612e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Mar 2022 13:13:41 +0200 Subject: [PATCH 245/423] x86,bpf: Avoid IBT objtool warning Clang can inline emit_indirect_jump() and then folds constants, which results in: | vmlinux.o: warning: objtool: emit_bpf_dispatcher()+0x6a4: relocation to !ENDBR: .text.__x86.indirect_thunk+0x40 | vmlinux.o: warning: objtool: emit_bpf_dispatcher()+0x67d: relocation to !ENDBR: .text.__x86.indirect_thunk+0x40 | vmlinux.o: warning: objtool: emit_bpf_tail_call_indirect()+0x386: relocation to !ENDBR: .text.__x86.indirect_thunk+0x20 | vmlinux.o: warning: objtool: emit_bpf_tail_call_indirect()+0x35d: relocation to !ENDBR: .text.__x86.indirect_thunk+0x20 Suppress the optimization such that it must emit a code reference to the __x86_indirect_thunk_array[] base. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Link: https://lkml.kernel.org/r/20220405075531.GB30877@worktop.programming.kicks-ass.net --- arch/x86/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8fe35ed11fd6..16b6efacf7c6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,6 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else #endif From 334865b2915c33080624e0d06f1c3e917036472c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 29 Mar 2022 13:21:45 -0700 Subject: [PATCH 246/423] x86/extable: Prefer local labels in .set directives Bernardo reported an error that Nathan bisected down to (x86_64) defconfig+LTO_CLANG_FULL+X86_PMEM_LEGACY. LTO vmlinux.o ld.lld: error: :1:13: redefinition of 'found' .set found, 0 ^ :29:1: while in macro instantiation extable_type_reg reg=%eax, type=(17 | ((0) << 16)) ^ This appears to be another LTO specific issue similar to what was folded into commit 4b5305decc84 ("x86/extable: Extend extable functionality"), where the `.set found, 0` in DEFINE_EXTABLE_TYPE_REG in arch/x86/include/asm/asm.h conflicts with the symbol for the static function `found` in arch/x86/kernel/pmem.c. Assembler .set directive declare symbols with global visibility, so the assembler may not rename such symbols in the event of a conflict. LTO could rename static functions if there was a conflict in C sources, but it cannot see into symbols defined in inline asm. The symbols are also retained in the symbol table, regardless of LTO. Give the symbols .L prefixes making them locally visible, so that they may be renamed for LTO to avoid conflicts, and to drop them from the symbol table regardless of LTO. Fixes: 4b5305decc84 ("x86/extable: Extend extable functionality") Reported-by: Bernardo Meurer Costa Debugged-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20220329202148.2379697-1-ndesaulniers@google.com --- arch/x86/include/asm/asm.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c878fed3056f..fbcfec4dc4cc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -154,24 +154,24 @@ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ - ".set found, 0\n" \ - ".set regnr, 0\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".set regnr, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".if (found != 1)\n" \ + ".if (.Lfound != 1)\n" \ ".error \"extable_type_reg: bad register argument\"\n" \ ".endif\n" \ ".endm\n" From 03e59b1e2f56245163b14c69e0a830c24b1a3a47 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 4 Apr 2022 13:49:02 +0200 Subject: [PATCH 247/423] mmc: renesas_sdhi: don't overwrite TAP settings when HS400 tuning is complete When HS400 tuning is complete and HS400 is going to be activated, we have to keep the current number of TAPs and should not overwrite them with a hardcoded value. This was probably a copy&paste mistake when upporting HS400 support from the BSP. Fixes: 26eb2607fa28 ("mmc: renesas_sdhi: add eMMC HS400 mode support") Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220404114902.12175-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 2a4d314aa027..ddb5ca2f559e 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -396,10 +396,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc) SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) | sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); - /* Set the sampling clock selection range of HS400 mode */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL, SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN | - 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT); + sd_scc_read32(host, priv, + SH_MOBILE_SDHI_SCC_DTCNTL)); /* Avoid bad TAP */ if (bad_taps & BIT(priv->tap_set)) { From 59b18a1e65b7a2134814106d0860010e10babe18 Mon Sep 17 00:00:00 2001 From: Reto Buerki Date: Thu, 7 Apr 2022 13:06:47 +0200 Subject: [PATCH 248/423] x86/msi: Fix msi message data shadow struct The x86 MSI message data is 32 bits in total and is either in compatibility or remappable format, see Intel Virtualization Technology for Directed I/O, section 5.1.2. Fixes: 6285aa50736 ("x86/msi: Provide msi message shadow structs") Co-developed-by: Adrian-Ken Rueegsegger Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Reto Buerki Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220407110647.67372-1-reet@codelabs.ch --- arch/x86/include/asm/msi.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index b85147d75626..d71c7e8b738d 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, /* Structs and defines for the X86 specific MSI message format */ typedef struct x86_msi_data { - u32 vector : 8, - delivery_mode : 3, - dest_mode_logical : 1, - reserved : 2, - active_low : 1, - is_level : 1; - - u32 dmar_subhandle; + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; } __attribute__ ((packed)) arch_msi_msg_data_t; #define arch_msi_msg_data x86_msi_data From 83a8441f8d8e2e47e9bf2aead3aca625ab95d5ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Apr 2022 08:41:39 -0400 Subject: [PATCH 249/423] mm/huge_memory: Avoid calling pmd_page() on a non-leaf PMD Calling try_to_unmap() with TTU_SPLIT_HUGE_PMD and a folio that's not mapped by a PMD causes oopses on arm64 because we now call page_folio() on an invalid page. pmd_page() returns a valid page for non-leaf PMDs on some architectures, so this bug escaped testing before now. Fix this bug by delaying the call to pmd_page() until after we know the PMD is a leaf. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215804 Fixes: af28a988b313 ("mm/huge_memory: Convert __split_huge_pmd() to take a folio") Reported-by: Zorro Lang Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Zorro Lang --- mm/huge_memory.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2fe38212e07c..c468fee595ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2145,15 +2145,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * pmd against. Otherwise we can end up replacing wrong folio. */ VM_BUG_ON(freeze && !folio); - if (folio) { - VM_WARN_ON_ONCE(!folio_test_locked(folio)); - if (folio != page_folio(pmd_page(*pmd))) - goto out; - } + VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || - is_pmd_migration_entry(*pmd)) + is_pmd_migration_entry(*pmd)) { + if (folio && folio != page_folio(pmd_page(*pmd))) + goto out; __split_huge_pmd_locked(vma, pmd, range.start, freeze); + } out: spin_unlock(ptl); From ffe06786b54039edcecb51a54061ee8d81036a19 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 14:35:04 -0400 Subject: [PATCH 250/423] mm/migrate: Use a folio in alloc_migration_target() This removes an assumption that a large folio is HPAGE_PMD_ORDER as well as letting us remove the call to prep_transhuge_page() and a few hidden calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index de175e2fdba5..9894e90db006 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1520,10 +1520,11 @@ out: struct page *alloc_migration_target(struct page *page, unsigned long private) { + struct folio *folio = page_folio(page); struct migration_target_control *mtc; gfp_t gfp_mask; unsigned int order = 0; - struct page *new_page = NULL; + struct folio *new_folio = NULL; int nid; int zidx; @@ -1531,34 +1532,31 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) gfp_mask = mtc->gfp_mask; nid = mtc->nid; if (nid == NUMA_NO_NODE) - nid = page_to_nid(page); + nid = folio_nid(folio); - if (PageHuge(page)) { - struct hstate *h = page_hstate(compound_head(page)); + if (folio_test_hugetlb(folio)) { + struct hstate *h = page_hstate(&folio->page); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); } - if (PageTransHuge(page)) { + if (folio_test_large(folio)) { /* * clear __GFP_RECLAIM to make the migration callback * consistent with regular THP allocations. */ gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; + order = folio_order(folio); } - zidx = zone_idx(page_zone(page)); + zidx = zone_idx(folio_zone(folio)); if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask); + new_folio = __folio_alloc(gfp_mask, order, nid, mtc->nmask); - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); - - return new_page; + return &new_folio->page; } #ifdef CONFIG_NUMA From c185e494ae0ceb126d89b8e3413ed0a1132e05d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 6 Jul 2021 10:50:39 -0400 Subject: [PATCH 251/423] mm/migrate: Use a folio in migrate_misplaced_transhuge_page() Unify alloc_misplaced_dst_page() and alloc_misplaced_dst_page_thp(). Removes an assumption that compound pages are HPAGE_PMD_ORDER. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 56 ++++++++++++++-------------------------------------- 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9894e90db006..6c31ee1e1c9b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1997,32 +1997,20 @@ static struct page *alloc_misplaced_dst_page(struct page *page, unsigned long data) { int nid = (int) data; - struct page *newpage; + int order = compound_order(page); + gfp_t gfp = __GFP_THISNODE; + struct folio *new; - newpage = __alloc_pages_node(nid, - (GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE | __GFP_NOMEMALLOC | - __GFP_NORETRY | __GFP_NOWARN) & - ~__GFP_RECLAIM, 0); + if (order > 0) + gfp |= GFP_TRANSHUGE_LIGHT; + else { + gfp |= GFP_HIGHUSER_MOVABLE | __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN; + gfp &= ~__GFP_RECLAIM; + } + new = __folio_alloc_node(gfp, order, nid); - return newpage; -} - -static struct page *alloc_misplaced_dst_page_thp(struct page *page, - unsigned long data) -{ - int nid = (int) data; - struct page *newpage; - - newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), - HPAGE_PMD_ORDER); - if (!newpage) - goto out; - - prep_transhuge_page(newpage); - -out: - return newpage; + return &new->page; } static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) @@ -2080,22 +2068,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - new_page_t *new; - bool compound; int nr_pages = thp_nr_pages(page); - /* - * PTE mapped THP or HugeTLB page can't reach here so the page could - * be either base page or THP. And it must be head page if it is - * THP. - */ - compound = PageTransHuge(page); - - if (compound) - new = alloc_misplaced_dst_page_thp; - else - new = alloc_misplaced_dst_page; - /* * Don't migrate file pages that are mapped in multiple processes * with execute permissions as they are probably shared libraries. @@ -2116,9 +2090,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, goto out; list_add(&page->lru, &migratepages); - nr_remaining = migrate_pages(&migratepages, *new, NULL, node, - MIGRATE_ASYNC, MR_NUMA_MISPLACED, - &nr_succeeded); + nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, + NULL, node, MIGRATE_ASYNC, + MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { list_del(&page->lru); From f584b68005ac782097d63a691740cb0dfed072ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:11:04 -0400 Subject: [PATCH 252/423] mm: Add vma_alloc_folio() This wrapper around alloc_pages_vma() calls prep_transhuge_page(), removing the obligation from the caller. This is in the same spirit as __folio_alloc(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- include/linux/gfp.h | 8 ++++++-- mm/mempolicy.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 761f8f1885c7..3e3d36fc2109 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -613,9 +613,11 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); struct folio *folio_alloc(gfp_t gfp, unsigned order); -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, +struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, true) #else @@ -627,8 +629,10 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ +#define alloc_pages_vma(gfp_mask, order, vma, addr, hugepage) \ alloc_pages(gfp_mask, order) +#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \ + folio_alloc(gfp, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2516d31db6c..ec15f4f4b714 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2227,6 +2227,19 @@ out: } EXPORT_SYMBOL(alloc_pages_vma); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage) +{ + struct folio *folio; + + folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr, + hugepage); + if (folio && order > 1) + prep_transhuge_page(&folio->page); + + return folio; +} + /** * alloc_pages - Allocate pages. * @gfp: GFP flags. From ec4858e07ed62eceb60bac2ded3c0d6e2471c66b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:23:39 -0400 Subject: [PATCH 253/423] mm/mempolicy: Use vma_alloc_folio() in new_page() Simplify new_page() by unifying the THP and base page cases, and handle orders other than 0 and HPAGE_PMD_ORDER correctly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/mempolicy.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec15f4f4b714..649bd3be8682 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1191,8 +1191,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, */ static struct page *new_page(struct page *page, unsigned long start) { + struct folio *dst, *src = page_folio(page); struct vm_area_struct *vma; unsigned long address; + gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL; vma = find_vma(current->mm, start); while (vma) { @@ -1202,24 +1204,19 @@ static struct page *new_page(struct page *page, unsigned long start) vma = vma->vm_next; } - if (PageHuge(page)) { - return alloc_huge_page_vma(page_hstate(compound_head(page)), + if (folio_test_hugetlb(src)) + return alloc_huge_page_vma(page_hstate(&src->page), vma, address); - } else if (PageTransHuge(page)) { - struct page *thp; - thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } + if (folio_test_large(src)) + gfp = GFP_TRANSHUGE; + /* - * if !vma, alloc_page_vma() will use task or system default policy + * if !vma, vma_alloc_folio() will use task or system default policy */ - return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL, - vma, address); + dst = vma_alloc_folio(gfp, folio_order(src), vma, address, + folio_test_large(src)); + return &dst->page; } #else From 98ea02597b9967c0817d29fee2f96d21b9e59ca5 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 7 Apr 2022 14:40:08 +0800 Subject: [PATCH 254/423] mm/rmap: Fix handling of hugetlbfs pages in page_vma_mapped_walk page_mapped_in_vma() sets nr_pages to 1, which is usually correct as we only want to know about the precise page and not about other pages in the folio. However, hugetlbfs does want to know about the entire hpage, and using nr_pages to get the size of the hpage is wrong. We could change page_mapped_in_vma() to special-case hugetlbfs pages, but it's better to ignore nr_pages in page_vma_mapped_walk() and get the size from the VMA instead. Fixes: 2aff7a4755bed ("mm: Convert page_vma_mapped_walk to work on PFNs") Signed-off-by: zhenwei pi Reviewed-by: Muchun Song Signed-off-by: Matthew Wilcox (Oracle) [edit commit message, use hstate directly] --- mm/page_vma_mapped.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 1187f9c1ec5b..14a5cda73dee 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -163,7 +163,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); if (unlikely(is_vm_hugetlb_page(vma))) { - unsigned long size = pvmw->nr_pages * PAGE_SIZE; + struct hstate *hstate = hstate_vma(vma); + unsigned long size = huge_page_size(hstate); /* The only possible mapping was handled on last iteration */ if (pvmw->pte) return not_found(pvmw); @@ -173,8 +174,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!pvmw->pte) return false; - pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm, - pvmw->pte); + pvmw->ptl = huge_pte_lockptr(hstate, mm, pvmw->pte); spin_lock(pvmw->ptl); if (!check_pte(pvmw)) return not_found(pvmw); From 4d5004451ab2218eab94a30e1841462c9316ba19 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Apr 2022 13:51:32 -0400 Subject: [PATCH 255/423] SUNRPC: Fix the svc_deferred_event trace class Fix a NULL deref crash that occurs when an svc_rqst is deferred while the sunrpc tracing subsystem is enabled. svc_revisit() sets dr->xprt to NULL, so it can't be relied upon in the tracepoint to provide the remote's address. Unfortunately we can't revert the "svc_deferred_class" hunk in commit ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") because there is now a specific check of event format specifiers for unsafe dereferences. The warning that check emits is: event svc_defer_recv has unsafe dereference of argument 1 A "%pISpc" format specifier with a "struct sockaddr *" is indeed flagged by this check. Instead, take the brute-force approach used by the svcrdma_qp_error tracepoint. Convert the dr::addr field into a presentation address in the TP_fast_assign() arm of the trace event, and store that as a string. This fix can be backported to -stable kernels. In the meantime, commit c6ced22997ad ("tracing: Update print fmt check to handle new __get_sockaddr() macro") is now in v5.18, so this wonky fix can be replaced with __sockaddr() and friends properly during the v5.19 merge window. Fixes: ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ab8ae1f6ba84..4eb706fa5825 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2017,17 +2017,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(const void *, dr) __field(u32, xid) - __string(addr, dr->xprt->xpt_remotebuf) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) ), TP_fast_assign( __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - __assign_str(addr, dr->xprt->xpt_remotebuf); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", (struct sockaddr *)&dr->addr); ), - TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, __entry->xid) ); From 5106dd6e74ab6c94daac1c357094f11e6934b36f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 Apr 2022 17:18:43 -0600 Subject: [PATCH 256/423] io_uring: propagate issue_flags state down to file assignment We'll need this in a future patch, when we could be assigning the file after the prep stage. While at it, get rid of the io_file_get() helper, it just makes the code harder to read. Signed-off-by: Jens Axboe --- fs/io_uring.c | 82 +++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 969f65de9972..398128db9728 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1183,8 +1183,9 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args); static void io_clean_op(struct io_kiocb *req); -static struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed); +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned issue_flags); +static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1314,13 +1315,20 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx) } static inline void io_req_set_rsrc_node(struct io_kiocb *req, - struct io_ring_ctx *ctx) + struct io_ring_ctx *ctx, + unsigned int issue_flags) { if (!req->fixed_rsrc_refs) { req->fixed_rsrc_refs = &ctx->rsrc_node->refs; - ctx->rsrc_cached_refs--; - if (unlikely(ctx->rsrc_cached_refs < 0)) - io_rsrc_refs_refill(ctx); + + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + lockdep_assert_held(&ctx->uring_lock); + ctx->rsrc_cached_refs--; + if (unlikely(ctx->rsrc_cached_refs < 0)) + io_rsrc_refs_refill(ctx); + } else { + percpu_ref_get(req->fixed_rsrc_refs); + } } } @@ -3330,7 +3338,8 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter return 0; } -static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + unsigned int issue_flags) { struct io_mapped_ubuf *imu = req->imu; u16 index, buf_index = req->buf_index; @@ -3340,7 +3349,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) if (unlikely(buf_index >= ctx->nr_user_bufs)) return -EFAULT; - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, issue_flags); index = array_index_nospec(buf_index, ctx->nr_user_bufs); imu = READ_ONCE(ctx->user_bufs[index]); req->imu = imu; @@ -3502,7 +3511,7 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(req, rw, iter); + ret = io_import_fixed(req, rw, iter, issue_flags); if (ret) return ERR_PTR(ret); return NULL; @@ -4394,8 +4403,10 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; - in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + else + in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { ret = -EBADF; goto done; @@ -4434,8 +4445,10 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; - in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + else + in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { ret = -EBADF; goto done; @@ -5973,7 +5986,7 @@ static void io_poll_remove_entries(struct io_kiocb *req) * either spurious wakeup or multishot CQE is served. 0 when it's done with * the request, then the mask is stored in req->result. */ -static int io_poll_check_events(struct io_kiocb *req) +static int io_poll_check_events(struct io_kiocb *req, bool locked) { struct io_ring_ctx *ctx = req->ctx; struct io_poll_iocb *poll = io_poll_get_single(req); @@ -6030,7 +6043,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -6055,7 +6068,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -7460,30 +7473,36 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file file_slot->file_ptr = file_ptr; } -static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned int issue_flags) { - struct file *file; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = NULL; unsigned long file_ptr; + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_lock(&ctx->uring_lock); + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; + goto out; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; file = (struct file *) (file_ptr & FFS_MASK); file_ptr &= ~FFS_MASK; /* mask in overlapping REQ_F and FFS bits */ req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, 0); +out: + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_unlock(&ctx->uring_lock); return file; } -static struct file *io_file_get_normal(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); - trace_io_uring_file_get(ctx, req, req->user_data, fd); + trace_io_uring_file_get(req->ctx, req, req->user_data, fd); /* we don't allow fixed io_uring files */ if (file && unlikely(file->f_op == &io_uring_fops)) @@ -7491,15 +7510,6 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx, return file; } -static inline struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed) -{ - if (fixed) - return io_file_get_fixed(ctx, req, fd); - else - return io_file_get_normal(ctx, req, fd); -} - static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) { struct io_kiocb *prev = req->timeout.prev; @@ -7749,8 +7759,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, blk_start_plug_nr_ios(&state->plug, state->submit_nr); } - req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE)); + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); + else + req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); if (unlikely(!req->file)) return -EBADF; } From 6bf9c47a398911e0ab920e362115153596c80432 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:10:08 -0600 Subject: [PATCH 257/423] io_uring: defer file assignment If an application uses direct open or accept, it knows in advance what direct descriptor value it will get as it picks it itself. This allows combined requests such as: sqe = io_uring_get_sqe(ring); io_uring_prep_openat_direct(sqe, ..., file_slot); sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe,file_slot, buf, buf_size, 0); sqe->flags |= IOSQE_FIXED_FILE; io_uring_submit(ring); where we prepare both a file open and read, and only get a completion event for the read when both have completed successfully. Currently links are fully prepared before the head is issued, but that fails if the dependent link needs a file assigned that isn't valid until the head has completed. Conversely, if the same chain is performed but the fixed file slot is already valid, then we would be unexpectedly returning data from the old file slot rather than the newly opened one. Make sure we're consistent here. Allow deferral of file setup, which makes this documented case work. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io-wq.h | 1 + fs/io_uring.c | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index dbecd27656c7..04d374e65e54 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) struct io_wq_work { struct io_wq_work_node list; unsigned flags; + int fd; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 398128db9728..bdc090fec29c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7240,6 +7240,23 @@ static void io_clean_op(struct io_kiocb *req) req->flags &= ~IO_REQ_CLEAN_FLAGS; } +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) +{ + if (req->file || !io_op_defs[req->opcode].needs_file) + return true; + + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->work.fd, issue_flags); + else + req->file = io_file_get_normal(req, req->work.fd); + if (req->file) + return true; + + req_set_fail(req); + req->result = -EBADF; + return false; +} + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { const struct cred *creds = NULL; @@ -7250,6 +7267,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (!io_op_defs[req->opcode].audit_skip) audit_uring_entry(req->opcode); + if (unlikely(!io_assign_file(req, issue_flags))) + return -EBADF; switch (req->opcode) { case IORING_OP_NOP: @@ -7394,10 +7413,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + const struct io_op_def *def = &io_op_defs[req->opcode]; unsigned int issue_flags = IO_URING_F_UNLOCKED; bool needs_poll = false; struct io_kiocb *timeout; - int ret = 0; + int ret = 0, err = -ECANCELED; /* one will be dropped by ->io_free_work() after returning to io-wq */ if (!(req->flags & REQ_F_REFCOUNT)) @@ -7409,14 +7429,18 @@ static void io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); + if (!io_assign_file(req, issue_flags)) { + err = -EBADF; + work->flags |= IO_WQ_WORK_CANCEL; + } + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (work->flags & IO_WQ_WORK_CANCEL) { - io_req_task_queue_fail(req, -ECANCELED); + io_req_task_queue_fail(req, err); return; } if (req->flags & REQ_F_FORCE_ASYNC) { - const struct io_op_def *def = &io_op_defs[req->opcode]; bool opcode_poll = def->pollin || def->pollout; if (opcode_poll && file_can_poll(req->file)) { @@ -7749,6 +7773,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[opcode].needs_file) { struct io_submit_state *state = &ctx->submit_state; + req->work.fd = READ_ONCE(sqe->fd); + /* * Plug now if we have more than 2 IO left after this, and the * target is potentially a read/write to block based storage. @@ -7758,13 +7784,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, state->need_plug = false; blk_start_plug_nr_ios(&state->plug, state->submit_nr); } - - if (req->flags & REQ_F_FIXED_FILE) - req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); - else - req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); - if (unlikely(!req->file)) - return -EBADF; } personality = READ_ONCE(sqe->personality); From d5361233e9ab920e135819f73dd8466355f1fddd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 31 Mar 2022 12:38:46 -0600 Subject: [PATCH 258/423] io_uring: drop the old style inflight file tracking io_uring tracks requests that are referencing an io_uring descriptor to be able to cancel without worrying about loops in the references. Since we now assign the file at execution time, the easier approach is to drop a potentially problematic reference before we punt the request. This eliminates the need to special case these types of files beyond just marking them as such, and simplifies cancelation quite a bit. This also fixes a recent issue where an async punted tee operation would with the io_uring descriptor as the output file would crash when attempting to get a reference to the file from the io-wq worker. We could have worked around that, but this is the much cleaner fix. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Reported-by: syzbot+c4b9303500a21750b250@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 85 ++++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 58 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bdc090fec29c..ad0d99ffbf0a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -112,8 +112,7 @@ IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ - REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ - REQ_F_ASYNC_DATA) + REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) #define IO_TCTX_REFS_CACHE_NR (1U << 10) @@ -500,7 +499,6 @@ struct io_uring_task { const struct io_ring_ctx *last; struct io_wq *io_wq; struct percpu_counter inflight; - atomic_t inflight_tracked; atomic_t in_idle; spinlock_t task_lock; @@ -1186,6 +1184,8 @@ static void io_clean_op(struct io_kiocb *req); static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); +static void io_drop_inflight_file(struct io_kiocb *req); +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1433,29 +1433,9 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) __must_hold(&req->ctx->timeout_lock) { - struct io_kiocb *req; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - -static bool io_match_linked(struct io_kiocb *head) -{ - struct io_kiocb *req; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; + return cancel_all; } /* @@ -1465,24 +1445,9 @@ static bool io_match_linked(struct io_kiocb *head) static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all) { - bool matched; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - if (head->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = head->ctx; - - /* protect against races with linked timeouts */ - spin_lock_irq(&ctx->timeout_lock); - matched = io_match_linked(head); - spin_unlock_irq(&ctx->timeout_lock); - } else { - matched = io_match_linked(head); - } - return matched; + return cancel_all; } static inline bool req_has_async_data(struct io_kiocb *req) @@ -1645,14 +1610,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) return req->flags & REQ_F_FIXED_FILE; } -static inline void io_req_track_inflight(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; - atomic_inc(¤t->io_uring->inflight_tracked); - } -} - static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -2563,6 +2520,8 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority) WARN_ON_ONCE(!tctx); + io_drop_inflight_file(req); + spin_lock_irqsave(&tctx->task_lock, flags); if (priority) wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list); @@ -6008,7 +5967,10 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) if (!req->result) { struct poll_table_struct pt = { ._key = req->cflags }; - req->result = vfs_poll(req->file, &pt) & req->cflags; + if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) + req->result = -EBADF; + else + req->result = vfs_poll(req->file, &pt) & req->cflags; } /* multishot, just fill an CQE and proceed */ @@ -7226,11 +7188,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(req->apoll); req->apoll = NULL; } - if (req->flags & REQ_F_INFLIGHT) { - struct io_uring_task *tctx = req->task->io_uring; - - atomic_dec(&tctx->inflight_tracked); - } if (req->flags & REQ_F_CREDS) put_cred(req->creds); if (req->flags & REQ_F_ASYNC_DATA) { @@ -7522,6 +7479,19 @@ out: return file; } +/* + * Drop the file for requeue operations. Only used of req->file is the + * io_uring descriptor itself. + */ +static void io_drop_inflight_file(struct io_kiocb *req) +{ + if (unlikely(req->flags & REQ_F_INFLIGHT)) { + fput(req->file); + req->file = NULL; + req->flags &= ~REQ_F_INFLIGHT; + } +} + static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); @@ -7529,8 +7499,8 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd) trace_io_uring_file_get(req->ctx, req, req->user_data, fd); /* we don't allow fixed io_uring files */ - if (file && unlikely(file->f_op == &io_uring_fops)) - io_req_track_inflight(req); + if (file && file->f_op == &io_uring_fops) + req->flags |= REQ_F_INFLIGHT; return file; } @@ -9437,7 +9407,6 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task, xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); atomic_set(&tctx->in_idle, 0); - atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; spin_lock_init(&tctx->task_lock); INIT_WQ_LIST(&tctx->task_list); @@ -10630,7 +10599,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) { if (tracked) - return atomic_read(&tctx->inflight_tracked); + return 0; return percpu_counter_sum(&tctx->inflight); } From cb318216732579da80202fe3e622a504e55b3a0f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Apr 2022 10:31:43 -0600 Subject: [PATCH 259/423] Revert "io_uring: Add support for napi_busy_poll" This reverts commit adc8682ec69012b68d5ab7123e246d2ad9a6f94b. There's some discussion on the API not being as good as it can be. Rather than ship something and be stuck with it forever, let's revert the NAPI support for now and work on getting something sorted out for the next kernel release instead. Link: https://lore.kernel.org/io-uring/b7bbc124-8502-0ee9-d4c8-7c41b4487264@kernel.dk/ Signed-off-by: Jens Axboe --- fs/io_uring.c | 232 +------------------------------------------------- 1 file changed, 1 insertion(+), 231 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ad0d99ffbf0a..60d6ac21519d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -411,11 +410,6 @@ struct io_ring_ctx { struct list_head sqd_list; unsigned long check_cq_overflow; -#ifdef CONFIG_NET_RX_BUSY_POLL - /* used to track busy poll napi_id */ - struct list_head napi_list; - spinlock_t napi_lock; /* napi_list lock */ -#endif struct { unsigned cached_cq_tail; @@ -1569,10 +1563,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_WQ_LIST(&ctx->locked_free_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); -#ifdef CONFIG_NET_RX_BUSY_POLL - INIT_LIST_HEAD(&ctx->napi_list); - spin_lock_init(&ctx->napi_lock); -#endif return ctx; err: kfree(ctx->dummy_ubuf); @@ -5730,108 +5720,6 @@ IO_NETOP_FN(send); IO_NETOP_FN(recv); #endif /* CONFIG_NET */ -#ifdef CONFIG_NET_RX_BUSY_POLL - -#define NAPI_TIMEOUT (60 * SEC_CONVERSION) - -struct napi_entry { - struct list_head list; - unsigned int napi_id; - unsigned long timeout; -}; - -/* - * Add busy poll NAPI ID from sk. - */ -static void io_add_napi(struct file *file, struct io_ring_ctx *ctx) -{ - unsigned int napi_id; - struct socket *sock; - struct sock *sk; - struct napi_entry *ne; - - if (!net_busy_loop_on()) - return; - - sock = sock_from_file(file); - if (!sock) - return; - - sk = sock->sk; - if (!sk) - return; - - napi_id = READ_ONCE(sk->sk_napi_id); - - /* Non-NAPI IDs can be rejected */ - if (napi_id < MIN_NAPI_ID) - return; - - spin_lock(&ctx->napi_lock); - list_for_each_entry(ne, &ctx->napi_list, list) { - if (ne->napi_id == napi_id) { - ne->timeout = jiffies + NAPI_TIMEOUT; - goto out; - } - } - - ne = kmalloc(sizeof(*ne), GFP_NOWAIT); - if (!ne) - goto out; - - ne->napi_id = napi_id; - ne->timeout = jiffies + NAPI_TIMEOUT; - list_add_tail(&ne->list, &ctx->napi_list); -out: - spin_unlock(&ctx->napi_lock); -} - -static inline void io_check_napi_entry_timeout(struct napi_entry *ne) -{ - if (time_after(jiffies, ne->timeout)) { - list_del(&ne->list); - kfree(ne); - } -} - -/* - * Busy poll if globally on and supporting sockets found - */ -static bool io_napi_busy_loop(struct list_head *napi_list) -{ - struct napi_entry *ne, *n; - - list_for_each_entry_safe(ne, n, napi_list, list) { - napi_busy_loop(ne->napi_id, NULL, NULL, true, - BUSY_POLL_BUDGET); - io_check_napi_entry_timeout(ne); - } - return !list_empty(napi_list); -} - -static void io_free_napi_list(struct io_ring_ctx *ctx) -{ - spin_lock(&ctx->napi_lock); - while (!list_empty(&ctx->napi_list)) { - struct napi_entry *ne = - list_first_entry(&ctx->napi_list, struct napi_entry, - list); - - list_del(&ne->list); - kfree(ne); - } - spin_unlock(&ctx->napi_lock); -} -#else -static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx) -{ -} - -static inline void io_free_napi_list(struct io_ring_ctx *ctx) -{ -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; @@ -5986,7 +5874,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) if (unlikely(!filled)) return -ECANCELED; io_cqring_ev_posted(ctx); - io_add_napi(req->file, ctx); } else if (req->result) { return 0; } @@ -6237,7 +6124,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, __io_poll_execute(req, mask, poll->events); return 0; } - io_add_napi(req->file, req->ctx); /* * Release ownership. If someone tried to queue a tw while it was @@ -8028,13 +7914,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) !(ctx->flags & IORING_SETUP_R_DISABLED)) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); -#ifdef CONFIG_NET_RX_BUSY_POLL - spin_lock(&ctx->napi_lock); - if (!list_empty(&ctx->napi_list) && - io_napi_busy_loop(&ctx->napi_list)) - ++ret; - spin_unlock(&ctx->napi_lock); -#endif + if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); if (creds) @@ -8172,9 +8052,6 @@ struct io_wait_queue { struct io_ring_ctx *ctx; unsigned cq_tail; unsigned nr_timeouts; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned busy_poll_to; -#endif }; static inline bool io_should_wake(struct io_wait_queue *iowq) @@ -8236,87 +8113,6 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, return 1; } -#ifdef CONFIG_NET_RX_BUSY_POLL -static void io_adjust_busy_loop_timeout(struct timespec64 *ts, - struct io_wait_queue *iowq) -{ - unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll); - struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to); - - if (timespec64_compare(ts, &pollto) > 0) { - *ts = timespec64_sub(*ts, pollto); - iowq->busy_poll_to = busy_poll_to; - } else { - u64 to = timespec64_to_ns(ts); - - do_div(to, 1000); - iowq->busy_poll_to = to; - ts->tv_sec = 0; - ts->tv_nsec = 0; - } -} - -static inline bool io_busy_loop_timeout(unsigned long start_time, - unsigned long bp_usec) -{ - if (bp_usec) { - unsigned long end_time = start_time + bp_usec; - unsigned long now = busy_loop_current_time(); - - return time_after(now, end_time); - } - return true; -} - -static bool io_busy_loop_end(void *p, unsigned long start_time) -{ - struct io_wait_queue *iowq = p; - - return signal_pending(current) || - io_should_wake(iowq) || - io_busy_loop_timeout(start_time, iowq->busy_poll_to); -} - -static void io_blocking_napi_busy_loop(struct list_head *napi_list, - struct io_wait_queue *iowq) -{ - unsigned long start_time = - list_is_singular(napi_list) ? 0 : - busy_loop_current_time(); - - do { - if (list_is_singular(napi_list)) { - struct napi_entry *ne = - list_first_entry(napi_list, - struct napi_entry, list); - - napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq, - true, BUSY_POLL_BUDGET); - io_check_napi_entry_timeout(ne); - break; - } - } while (io_napi_busy_loop(napi_list) && - !io_busy_loop_end(iowq, start_time)); -} - -static void io_putback_napi_list(struct io_ring_ctx *ctx, - struct list_head *napi_list) -{ - struct napi_entry *cne, *lne; - - spin_lock(&ctx->napi_lock); - list_for_each_entry(cne, &ctx->napi_list, list) - list_for_each_entry(lne, napi_list, list) - if (cne->napi_id == lne->napi_id) { - list_del(&lne->list); - kfree(lne); - break; - } - list_splice(napi_list, &ctx->napi_list); - spin_unlock(&ctx->napi_lock); -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -8329,9 +8125,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, struct io_rings *rings = ctx->rings; ktime_t timeout = KTIME_MAX; int ret; -#ifdef CONFIG_NET_RX_BUSY_POLL - LIST_HEAD(local_napi_list); -#endif do { io_cqring_overflow_flush(ctx); @@ -8354,29 +8147,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } -#ifdef CONFIG_NET_RX_BUSY_POLL - iowq.busy_poll_to = 0; - if (!(ctx->flags & IORING_SETUP_SQPOLL)) { - spin_lock(&ctx->napi_lock); - list_splice_init(&ctx->napi_list, &local_napi_list); - spin_unlock(&ctx->napi_lock); - } -#endif if (uts) { struct timespec64 ts; if (get_timespec64(&ts, uts)) return -EFAULT; -#ifdef CONFIG_NET_RX_BUSY_POLL - if (!list_empty(&local_napi_list)) - io_adjust_busy_loop_timeout(&ts, &iowq); -#endif timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); } -#ifdef CONFIG_NET_RX_BUSY_POLL - else if (!list_empty(&local_napi_list)) - iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll); -#endif init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; @@ -8386,12 +8163,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; trace_io_uring_cqring_wait(ctx, min_events); -#ifdef CONFIG_NET_RX_BUSY_POLL - if (iowq.busy_poll_to) - io_blocking_napi_busy_loop(&local_napi_list, &iowq); - if (!list_empty(&local_napi_list)) - io_putback_napi_list(ctx, &local_napi_list); -#endif do { /* if we can't even flush overflow, don't wait for more */ if (!io_cqring_overflow_flush(ctx)) { @@ -10176,7 +9947,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_req_caches_free(ctx); if (ctx->hash_map) io_wq_put_hash(ctx->hash_map); - io_free_napi_list(ctx); kfree(ctx->cancel_hash); kfree(ctx->dummy_ubuf); kfree(ctx->io_buffers); From 0f5e4b83b37a96e3643951588ed7176b9b187c0a Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 6 Apr 2022 13:55:33 +0200 Subject: [PATCH 260/423] io_uring: implement compat handling for IORING_REGISTER_IOWQ_AFF Similarly to the way it is done im mbind syscall. Cc: stable@vger.kernel.org # 5.14 Fixes: fe76421d1da1dcdb ("io_uring: allow user configurable IO thread CPU affinity") Signed-off-by: Eugene Syromiatnikov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 60d6ac21519d..a88eca3e0902 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11472,7 +11472,15 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, if (len > cpumask_size()) len = cpumask_size(); - if (copy_from_user(new_mask, arg, len)) { + if (in_compat_syscall()) { + ret = compat_get_bitmap(cpumask_bits(new_mask), + (const compat_ulong_t __user *)arg, + len * 8 /* CHAR_BIT */); + } else { + ret = copy_from_user(new_mask, arg, len); + } + + if (ret) { free_cpumask_var(new_mask); return -EFAULT; } From 34bb77184123ae401100a4d156584f12fa630e5c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Apr 2022 12:43:57 +0100 Subject: [PATCH 261/423] io_uring: nospec index for tags on files update Don't forget to array_index_nospec() for indexes before updating rsrc tags in __io_sqe_files_update(), just use already safe and precalculated index @i. Fixes: c3bdad0271834 ("io_uring: add generic rsrc update with tags") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a88eca3e0902..b517fd9c3f60 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9094,7 +9094,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - *io_get_tag_slot(data, up->offset + done) = tag; + *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); err = io_sqe_file_register(ctx, file, i); if (err) { From a07211e3001435fe8591b992464cd8d5e3c98c5a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Apr 2022 12:43:58 +0100 Subject: [PATCH 262/423] io_uring: don't touch scm_fp_list after queueing skb It's safer to not touch scm_fp_list after we queued an skb to which it was assigned, there might be races lurking if we screw subtle sync guarantees on the io_uring side. Fixes: 6b06314c47e14 ("io_uring: add file set registration") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b517fd9c3f60..7e672464dcb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8631,8 +8631,12 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_queue_head(&sk->sk_receive_queue, skb); - for (i = 0; i < nr_files; i++) - fput(fpl->fp[i]); + for (i = 0; i < nr; i++) { + struct file *file = io_file_from_index(ctx, i + offset); + + if (file) + fput(file); + } } else { kfree_skb(skb); free_uid(fpl->user); From 8f0a24801bb44aa58496945aabb904c729176772 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Apr 2022 14:05:04 +0100 Subject: [PATCH 263/423] io_uring: zero tag on rsrc removal Automatically default rsrc tag in io_queue_rsrc_removal(), it's safer than leaving it there and relying on the rest of the code to behave and not use it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1cf262a50df17478ea25b22494dcc19f3a80301f.1649336342.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7e672464dcb3..d62079e9096c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8927,13 +8927,15 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { + u64 *tag_slot = io_get_tag_slot(data, idx); struct io_rsrc_put *prsrc; prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); if (!prsrc) return -ENOMEM; - prsrc->tag = *io_get_tag_slot(data, idx); + prsrc->tag = *tag_slot; + *tag_slot = 0; prsrc->rsrc = rsrc; list_add(&prsrc->list, &node->rsrc_list); return 0; From 4cdd158be9d09223737df83136a1fb65269d809a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Apr 2022 14:05:05 +0100 Subject: [PATCH 264/423] io_uring: use nospec annotation for more indexes There are still several places that using pre array_index_nospec() indexes, fix them up. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b01ef5ee83f72ed35ad525912370b729f5d145f4.1649336342.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d62079e9096c..fafd1ca4780b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9004,7 +9004,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; struct io_fixed_file *file_slot; struct file *file; - int ret, i; + int ret; io_ring_submit_lock(ctx, needs_lock); ret = -ENXIO; @@ -9017,8 +9017,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) if (ret) goto out; - i = array_index_nospec(offset, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, i); + offset = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, offset); ret = -EBADF; if (!file_slot->file_ptr) goto out; @@ -9074,8 +9074,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (file_slot->file_ptr) { file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, up->offset + done, - ctx->rsrc_node, file); + err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); if (err) break; file_slot->file_ptr = 0; @@ -9758,7 +9757,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, i = array_index_nospec(offset, ctx->nr_user_bufs); if (ctx->user_bufs[i] != ctx->dummy_ubuf) { - err = io_queue_rsrc_removal(ctx->buf_data, offset, + err = io_queue_rsrc_removal(ctx->buf_data, i, ctx->rsrc_node, ctx->user_bufs[i]); if (unlikely(err)) { io_buffer_unmap(ctx, &imu); From 830f1111d90e8770fcfad8bd5628e8ae6fecec06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Mar 2022 20:00:07 -0400 Subject: [PATCH 265/423] NFS: Replace readdir's use of xxhash() with hash_64() Both xxhash() and hash_64() appear to give similarly low collision rates with a standard linearly increasing readdir offset. They both give similarly higher collision rates when applied to ext4's offsets. So switch to using the standard hash_64(). Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 4 ---- fs/nfs/dir.c | 9 +++------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 47a53b3362b6..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -4,10 +4,6 @@ config NFS_FS depends on INET && FILE_LOCKING && MULTIUSER select LOCKD select SUNRPC - select CRYPTO - select CRYPTO_HASH - select XXHASH - select CRYPTO_XXHASH select NFS_ACL_SUPPORT if NFS_V3_ACL help Choose Y here if you want to access files residing on other diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0365063b85a2..c6b263b5faf1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "delegation.h" #include "iostat.h" @@ -350,10 +350,7 @@ out: * of directory cookies. Content is addressed by the value of the * cookie index of the first readdir entry in a page. * - * The xxhash algorithm is chosen because it is fast, and is supposed - * to result in a decent flat distribution of hashes. - * - * We then select only the first 18 bits to avoid issues with excessive + * We select only the first 18 bits to avoid issues with excessive * memory use for the page cache XArray. 18 bits should allow the caching * of 262144 pages of sequences of readdir entries. Since each page holds * 127 readdir entries for a typical 64-bit system, that works out to a @@ -363,7 +360,7 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) { if (cookie == 0) return 0; - return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK; + return hash_64(cookie, 18); } static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, From f00432063db1a0db484e85193eccc6845435b80e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Apr 2022 15:58:11 -0400 Subject: [PATCH 266/423] SUNRPC: Ensure we flush any closed sockets before xs_xprt_free() We must ensure that all sockets are closed before we call xprt_free() and release the reference to the net namespace. The problem is that calling fput() will defer closing the socket until delayed_fput() gets called. Let's fix the situation by allowing rpciod and the transport teardown code (which runs on the system wq) to call __fput_sync(), and directly close the socket. Reported-by: Felix Fu Acked-by: Al Viro Fixes: a73881c96d73 ("SUNRPC: Fix an Oops in udp_poll()") Cc: stable@vger.kernel.org # 5.1.x: 3be232f11a3c: SUNRPC: Prevent immediate close+reconnect Cc: stable@vger.kernel.org # 5.1.x: 89f42494f92f: SUNRPC: Don't call connect() more than once on a TCP socket Cc: stable@vger.kernel.org # 5.1.x Signed-off-by: Trond Myklebust --- fs/file_table.c | 1 + include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 7 +------ net/sunrpc/xprtsock.c | 16 +++++++++++++--- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 7d2e692b66a9..ada8fe814db9 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -412,6 +412,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ac33892da411..a4848c7bab80 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1004,7 +1004,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); -DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); DECLARE_EVENT_CLASS(rpc_xprt_event, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 73344ffb2692..ad62eba540a4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -930,12 +930,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - trace_xprt_disconnect_cleanup(xprt); - xprt->ops->close(xprt); - } - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b75891b3cc0..c6a13893e308 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -879,7 +879,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -915,7 +915,7 @@ static int xs_local_send_request(struct rpc_rqst *req) -status); fallthrough; case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -1185,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1208,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } From dcc7977c7fdd0b59809cf7420ae1d5f5b5bd16ad Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 1 Apr 2022 10:59:05 +0800 Subject: [PATCH 267/423] NFSv4.2: Fix missing removal of SLAB_ACCOUNT on kmem_cache allocation The commit 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") has stripped GFP_KERNEL_ACCOUNT down to GFP_KERNEL, however, it forgot to remove SLAB_ACCOUNT from kmem_cache allocation. It means that memory is still limited by kmemcg. This patch also fix a NULL pointer reference issue [1] reported by NeilBrown. Link: https://lore.kernel.org/all/164870069595.25542.17292003658915487357@noble.neil.brown.name/ [1] Fixes: 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") Fixes: 5abc1e37afa0 ("mm: list_lru: allocate list_lru_one only when needed") Reported-by: NeilBrown Signed-off-by: Muchun Song Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index ad3405c64b9e..e7b34f7e0614 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -997,7 +997,7 @@ int __init nfs4_xattr_cache_init(void) nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", sizeof(struct nfs4_xattr_cache), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), nfs4_xattr_cache_init_once); if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; From d3c15033b240767d0287f1c4a529cbbe2d5ded8a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 23:18:57 -0400 Subject: [PATCH 268/423] SUNRPC: Handle ENOMEM in call_transmit_status() Both call_transmit() and call_bc_transmit() can now return ENOMEM, so let's make sure that we handle the errors gracefully. Fixes: 0472e4766049 ("SUNRPC: Convert socket page send code to use iov_iter()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3c7407104d54..07328f1d3885 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2200,6 +2200,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2283,6 +2284,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; From 9d82819d5b065348ce623f196bf601028e22ed00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 09:50:19 -0400 Subject: [PATCH 269/423] SUNRPC: Handle low memory situations in call_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to handle ENFILE, ENOBUFS, and ENOMEM, because xprt_wake_pending_tasks() can be called with any one of these due to socket creation failures. Fixes: b61d59fffd3e ("SUNRPC: xs_tcp_connect_worker{4,6}: merge common code") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 07328f1d3885..6757b0fa5367 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2367,6 +2367,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; From 68b78dcdf93a845d68e34918d17c125924240584 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:33:19 -0400 Subject: [PATCH 270/423] NFSv4/pnfs: Handle RPC allocation errors in nfs4_proc_layoutget If rpc_run_task() fails due to an allocation error, then bail out early. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e3f5b380cefe..16106f805ffa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9615,6 +9615,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return ERR_CAST(task); status = rpc_wait_for_completion_task(task); if (status != 0) From 88dee0cc93adcd83db9d089c1163dc88edafd1c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:34:35 -0400 Subject: [PATCH 271/423] NFS: Ensure rpc_run_task() cannot fail in nfs_async_rename() Ensure the call to rpc_run_task() cannot fail by preallocating the rpc_task. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 5fa11e1aca4c..6f325e10056c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -347,6 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); + task_setup_data.task = &data->task; task_setup_data.callback_data = data; data->cred = get_current_cred(); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 49ba486aea5f..2863e5a69c6a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1694,6 +1694,7 @@ struct nfs_unlinkdata { struct nfs_renamedata { struct nfs_renameargs args; struct nfs_renameres res; + struct rpc_task task; const struct cred *cred; struct inode *old_dir; struct dentry *old_dentry; From 25cf32ad5dba79385f6e7de9008dcb75556c42d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:36:19 -0400 Subject: [PATCH 272/423] SUNRPC: Handle allocation failure in rpc_new_task() If the call to rpc_alloc_task() fails, then ensure that the calldata is released, and that rpc_run_task() and rpc_run_bc_task() bail out early. Reported-by: NeilBrown Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/sched.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6757b0fa5367..af0174d7ce5a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1127,6 +1127,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) struct rpc_task *task; task = rpc_new_task(task_setup_data); + if (IS_ERR(task)) + return task; if (!RPC_IS_ASYNC(task)) task->tk_flags |= RPC_TASK_CRED_NOREF; @@ -1227,6 +1229,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); + if (IS_ERR(task)) { + xprt_free_bc_request(req); + return task; + } + xprt_init_bc_request(req, task); task->tk_action = call_bc_encode; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b258b87a3ec2..7f70c1e608b7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1128,6 +1128,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) if (task == NULL) { task = rpc_alloc_task(); + if (task == NULL) { + rpc_release_calldata(setup_data->callback_ops, + setup_data->callback_data); + return ERR_PTR(-ENOMEM); + } flags = RPC_TASK_DYNAMIC; } From b056fa070814897be32d83b079dbc311375588e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 14:10:23 -0400 Subject: [PATCH 273/423] SUNRPC: svc_tcp_sendmsg() should handle errors from xdr_alloc_bvec() The allocation is done with GFP_KERNEL, but it could still fail in a low memory situation. Fixes: 4a85a6a3320b ("SUNRPC: Handle TCP socket sends with kernel_sendpage() again") Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 478f857cdaed..6ea3d87e1147 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1096,7 +1096,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, int ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) From ff053dbbaffec45c85e5bfe43306d26694a6433f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:51:58 -0400 Subject: [PATCH 274/423] SUNRPC: Move the call to xprt_send_pagedata() out of xprt_sock_sendmsg() The client and server have different requirements for their memory allocation, so move the allocation of the send buffer out of the socket send code that is common to both. Reported-by: NeilBrown Fixes: b2648015d452 ("SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent") Signed-off-by: Trond Myklebust --- net/sunrpc/socklib.c | 6 ------ net/sunrpc/svcsock.c | 9 ++++++--- net/sunrpc/xprtsock.c | 15 +++++++++++++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 05b38bf68316..71ba4cf513bc 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -221,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg, static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - int err; - - err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); - if (err < 0) - return err; - iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len + xdr->page_base); return xprt_sendmsg(sock, msg, base + xdr->page_base); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6ea3d87e1147..cc35ec433400 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -579,15 +579,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + goto out_unlock; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); } + xdr_free_bvec(xdr); trace_svcsock_udp_send(xprt, err); - +out_unlock: mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c6a13893e308..8ab64ea46870 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -825,9 +825,14 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) static int xs_stream_prepare_request(struct rpc_rqst *req) { + gfp_t gfp = rpc_task_gfp_mask(); + int ret; + + ret = xdr_alloc_bvec(&req->rq_snd_buf, gfp); + if (ret < 0) + return ret; xdr_free_bvec(&req->rq_rcv_buf); - return xdr_alloc_bvec( - &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + return xdr_alloc_bvec(&req->rq_rcv_buf, gfp); } /* @@ -956,6 +961,9 @@ static int xs_udp_send_request(struct rpc_rqst *req) if (!xprt_request_get_cong(xprt, req)) return -EBADSLT; + status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (status < 0) + return status; req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); @@ -2554,6 +2562,9 @@ static int bc_sendto(struct rpc_rqst *req) int err; req->rq_xtime = ktime_get(); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (err < 0) + return err; err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) From b71597edfaade119157ded98991bac7160be80c2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 8 Apr 2022 10:00:42 +0200 Subject: [PATCH 275/423] mmc: core: improve API to make clear mmc_hw_reset is for cards To make it unambiguous that mmc_hw_reset() is for cards and not for controllers, we make the function argument mmc_card instead of mmc_host. Also, all users are converted. Suggested-by: Ulf Hansson Signed-off-by: Wolfram Sang Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20220408080045.6497-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 2 +- drivers/mmc/core/core.c | 5 +++-- drivers/mmc/core/mmc_test.c | 3 +-- drivers/net/wireless/ath/ath10k/sdio.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- drivers/net/wireless/ti/wlcore/sdio.c | 2 +- include/linux/mmc/core.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index db99882c95d8..506dc900f5c7 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -993,7 +993,7 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host, return -EEXIST; md->reset_done |= type; - err = mmc_hw_reset(host); + err = mmc_hw_reset(host->card); /* Ensure we switch back to the correct partition */ if (err) { struct mmc_blk_data *main_md = diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 368f10405e13..c6ae16d40766 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1995,7 +1995,7 @@ static void mmc_hw_reset_for_init(struct mmc_host *host) /** * mmc_hw_reset - reset the card in hardware - * @host: MMC host to which the card is attached + * @card: card to be reset * * Hard reset the card. This function is only for upper layers, like the * block layer or card drivers. You cannot use it in host drivers (struct @@ -2003,8 +2003,9 @@ static void mmc_hw_reset_for_init(struct mmc_host *host) * * Return: 0 on success, -errno on failure */ -int mmc_hw_reset(struct mmc_host *host) +int mmc_hw_reset(struct mmc_card *card) { + struct mmc_host *host = card->host; int ret; ret = host->bus_ops->hw_reset(host); diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index e6a2fd2c6d5c..8d9bceeff986 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -2325,10 +2325,9 @@ static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test) static int mmc_test_reset(struct mmc_test_card *test) { struct mmc_card *card = test->card; - struct mmc_host *host = card->host; int err; - err = mmc_hw_reset(host); + err = mmc_hw_reset(card); if (!err) { /* * Reset will re-enable the card's command queue, but tests diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 63e1c2d783c5..73693c66cef1 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -1633,7 +1633,7 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar) return; } - ret = mmc_hw_reset(ar_sdio->func->card->host); + ret = mmc_hw_reset(ar_sdio->func->card); if (ret) ath10k_warn(ar, "unable to reset sdio: %d\n", ret); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index ba3c159111d3..55285cad527f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4165,7 +4165,7 @@ static int brcmf_sdio_bus_reset(struct device *dev) /* reset the adapter */ sdio_claim_host(sdiodev->func1); - mmc_hw_reset(sdiodev->func1->card->host); + mmc_hw_reset(sdiodev->func1->card); sdio_release_host(sdiodev->func1); brcmf_bus_change_state(sdiodev->bus_if, BRCMF_BUS_DOWN); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index bde9e4bbfffe..4f3238d2a171 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -2639,7 +2639,7 @@ static void mwifiex_sdio_card_reset_work(struct mwifiex_adapter *adapter) /* Run a HW reset of the SDIO interface. */ sdio_claim_host(func); - ret = mmc_hw_reset(func->card->host); + ret = mmc_hw_reset(func->card); sdio_release_host(func); switch (ret) { diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 72fc41ac83c0..9140b0163474 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -146,7 +146,7 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) * To guarantee that the SDIO card is power cycled, as required to make * the FW programming to succeed, let's do a brute force HW reset. */ - mmc_hw_reset(card->host); + mmc_hw_reset(card); sdio_enable_func(func); sdio_release_host(func); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 71101d1ec825..de5c64bbdb72 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -175,7 +175,7 @@ void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq); int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries); -int mmc_hw_reset(struct mmc_host *host); +int mmc_hw_reset(struct mmc_card *card); int mmc_sw_reset(struct mmc_host *host); void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card); From 71ff461c3f41f6465434b9e980c01782763e7ad8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 31 Mar 2022 09:23:01 +0300 Subject: [PATCH 276/423] iommu/omap: Fix regression in probe for NULL pointer dereference Commit 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") started triggering a NULL pointer dereference for some omap variants: __iommu_probe_device from probe_iommu_group+0x2c/0x38 probe_iommu_group from bus_for_each_dev+0x74/0xbc bus_for_each_dev from bus_iommu_probe+0x34/0x2e8 bus_iommu_probe from bus_set_iommu+0x80/0xc8 bus_set_iommu from omap_iommu_init+0x88/0xcc omap_iommu_init from do_one_initcall+0x44/0x24 This is caused by omap iommu probe returning 0 instead of ERR_PTR(-ENODEV) as noted by Jason Gunthorpe . Looks like the regression already happened with an earlier commit 6785eb9105e3 ("iommu/omap: Convert to probe/release_device() call-backs") that changed the function return type and missed converting one place. Cc: Drew Fustini Cc: Lu Baolu Cc: Suman Anna Suggested-by: Jason Gunthorpe Fixes: 6785eb9105e3 ("iommu/omap: Convert to probe/release_device() call-backs") Fixes: 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") Signed-off-by: Tony Lindgren Tested-by: Drew Fustini Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220331062301.24269-1-tony@atomide.com Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 4aab631ef517..d9cf2820c02e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1661,7 +1661,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev) num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", sizeof(phandle)); if (num_iommus < 0) - return 0; + return ERR_PTR(-ENODEV); arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) From 31a099dbd91e69fcab55eef4be15ed7a8c984918 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 7 Apr 2022 15:33:20 +0800 Subject: [PATCH 277/423] arm64: patch_text: Fixup last cpu should be master These patch_text implementations are using stop_machine_cpuslocked infrastructure with atomic cpu_count. The original idea: When the master CPU patch_text, the others should wait for it. But current implementation is using the first CPU as master, which couldn't guarantee the remaining CPUs are waiting. This patch changes the last CPU as the master to solve the potential risk. Fixes: ae16480785de ("arm64: introduce interfaces to hotpatch kernel and module code") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Reviewed-by: Catalin Marinas Reviewed-by: Masami Hiramatsu Cc: Link: https://lore.kernel.org/r/20220407073323.743224-2-guoren@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/patching.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 771f543464e0..33e0fabc0b79 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); From 83bea32ac7ed37bbda58733de61fc9369513f9f9 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Thu, 7 Apr 2022 18:11:28 +0900 Subject: [PATCH 278/423] arm64: Add part number for Arm Cortex-A78AE Add the MIDR part number info for the Arm Cortex-A78AE[1] and add it to spectre-BHB affected list[2]. [1]: https://developer.arm.com/Processors/Cortex-A78AE [2]: https://developer.arm.com/Arm%20Security%20Center/Spectre-BHB Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Cc: James Morse Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220407091128.8700-1-chanho61.park@samsung.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 232b439cbaf3..ff8f4511df71 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5777929d35bf..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), From 2610bd72efe4b376febd477425769f647152a3a8 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 5 Apr 2022 16:53:00 +0900 Subject: [PATCH 279/423] dt-bindings: net: ave: Clean up clocks, resets, and their names using compatible string Instead of "oneOf:" choices, use "allOf:" and "if:" to define clocks, resets, and their names that can be taken by the compatible string. The order of clock-names and reset-names doesn't change here. Signed-off-by: Kunihiko Hayashi Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../bindings/net/socionext,uniphier-ave4.yaml | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index e602761f7b14..f257520b9a7e 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -13,9 +13,6 @@ description: | This describes the devicetree bindings for AVE ethernet controller implemented on Socionext UniPhier SoCs. -allOf: - - $ref: ethernet-controller.yaml# - properties: compatible: enum: @@ -44,25 +41,13 @@ properties: minItems: 1 maxItems: 4 - clock-names: - oneOf: - - items: # for Pro4 - - const: gio - - const: ether - - const: ether-gb - - const: ether-phy - - const: ether # for others + clock-names: true resets: minItems: 1 maxItems: 2 - reset-names: - oneOf: - - items: # for Pro4 - - const: gio - - const: ether - - const: ether # for others + reset-names: true socionext,syscon-phy-mode: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -78,6 +63,42 @@ properties: $ref: mdio.yaml# unevaluatedProperties: false +allOf: + - $ref: ethernet-controller.yaml# + - if: + properties: + compatible: + contains: + const: socionext,uniphier-pro4-ave4 + then: + properties: + clocks: + minItems: 4 + maxItems: 4 + clock-names: + items: + - const: gio + - const: ether + - const: ether-gb + - const: ether-phy + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: gio + - const: ether + else: + properties: + clocks: + maxItems: 1 + clock-names: + const: ether + resets: + maxItems: 1 + reset-names: + const: ether + required: - compatible - reg From 5a80059d88046b0a87e957565363ba3ee57600bb Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 5 Apr 2022 16:53:01 +0900 Subject: [PATCH 280/423] dt-bindings: net: ave: Use unevaluatedProperties This refers common bindings, so this is preferred for unevaluatedProperties instead of additionalProperties. Signed-off-by: Kunihiko Hayashi Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/socionext,uniphier-ave4.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index f257520b9a7e..b0ebcef6801c 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -111,7 +111,7 @@ required: - reset-names - mdio -additionalProperties: false +unevaluatedProperties: false examples: - | From 2105f700b53c24aa48b65c15652acc386044d26a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 6 Apr 2022 14:22:41 +0300 Subject: [PATCH 281/423] net/sched: flower: fix parsing of ethertype following VLAN header A tc flower filter matching TCA_FLOWER_KEY_VLAN_ETH_TYPE is expected to match the L2 ethertype following the first VLAN header, as confirmed by linked discussion with the maintainer. However, such rule also matches packets that have additional second VLAN header, even though filter has both eth_type and vlan_ethtype set to "ipv4". Looking at the code this seems to be mostly an artifact of the way flower uses flow dissector. First, even though looking at the uAPI eth_type and vlan_ethtype appear like a distinct fields, in flower they are all mapped to the same key->basic.n_proto. Second, flow dissector skips following VLAN header as no keys for FLOW_DISSECTOR_KEY_CVLAN are set and eventually assigns the value of n_proto to last parsed header. With these, such filters ignore any headers present between first VLAN header and first "non magic" header (ipv4 in this case) that doesn't result FLOW_DISSECT_RET_PROTO_AGAIN. Fix the issue by extending flow dissector VLAN key structure with new 'vlan_eth_type' field that matches first ethertype following previously parsed VLAN header. Modify flower classifier to set the new flow_dissector_key_vlan->vlan_eth_type with value obtained from TCA_FLOWER_KEY_VLAN_ETH_TYPE/TCA_FLOWER_KEY_CVLAN_ETH_TYPE uAPIs. Link: https://lore.kernel.org/all/Yjhgi48BpTGh6dig@nanopsycho/ Fixes: 9399ae9a6cb2 ("net_sched: flower: Add vlan support") Fixes: d64efd0926ba ("net/sched: flower: Add supprt for matching on QinQ vlan headers") Signed-off-by: Vlad Buslov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 2 ++ net/core/flow_dissector.c | 1 + net/sched/cls_flower.c | 18 +++++++++++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index aa33e1092e2c..9f65f1bfbd24 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { __be16 vlan_tci; }; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_mpls_lse { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 03b6e649c428..9bd887610c18 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1183,6 +1183,7 @@ proto_again: VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; + key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c80fc49c0da1..ed5e6f08e74a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1013,6 +1013,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -1031,6 +1032,11 @@ static void fl_set_key_vlan(struct nlattr **tb, } key_val->vlan_tpid = ethertype; key_mask->vlan_tpid = cpu_to_be16(~0); + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1602,8 +1608,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (eth_type_vlan(ethertype)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); @@ -1611,6 +1618,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_CVLAN_ID, TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, &key->cvlan, &mask->cvlan); fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, @@ -3002,13 +3010,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } From 908b768f9a8ffca2ef69f3145e23a6a259f99ac3 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 8 Apr 2022 11:32:00 +0300 Subject: [PATCH 282/423] MAINTAINERS: Fix reviewer info for a few ROHM ICs The email backend used by ROHM keeps labeling patches as spam. Additionally, there have been reports of some emails been completely dropped. Finally also the email list (or shared inbox) linux-power@fi.rohmeurope.com inadvertly stopped working and has not been reviwed during the past few weeks. Remove no longer working list 'linux-power' list-entry and switch my email to use the personal gmail account instead of the company account. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/Yk/zAHusOdf4+h06@dc73szyh141qn5ck3nwqy-3.rev.dnainternet.fi Signed-off-by: Mark Brown --- MAINTAINERS | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..3af36d852c38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5715,7 +5715,7 @@ W: http://lanana.org/docs/device-list/index.html DEVICE RESOURCE MANAGEMENT HELPERS M: Hans de Goede -R: Matti Vaittinen +R: Matti Vaittinen S: Maintained F: include/linux/devm-helpers.h @@ -11208,7 +11208,7 @@ F: scripts/spdxcheck.py LINEAR RANGES HELPERS M: Mark Brown -R: Matti Vaittinen +R: Matti Vaittinen F: lib/linear_ranges.c F: lib/test_linear_ranges.c F: include/linux/linear_range.h @@ -17011,8 +17011,7 @@ S: Odd Fixes F: drivers/tty/serial/rp2.* ROHM BD99954 CHARGER IC -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/power/supply/bd99954-charger.c F: drivers/power/supply/bd99954-charger.h @@ -17035,8 +17034,7 @@ F: drivers/regulator/bd9571mwv-regulator.c F: include/linux/mfd/bd9571mwv.h ROHM POWER MANAGEMENT IC DEVICE DRIVERS -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/clk/clk-bd718x7.c F: drivers/gpio/gpio-bd71815.c @@ -21118,7 +21116,7 @@ F: include/linux/regulator/ K: regulator_get_optional VOLTAGE AND CURRENT REGULATOR IRQ HELPERS -R: Matti Vaittinen +R: Matti Vaittinen F: drivers/regulator/irq_helpers.c VRF From 2cd1881b9821be68d1eb748c96311258b16af225 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 6 Apr 2022 16:54:20 +0300 Subject: [PATCH 283/423] bonding: Update layer2 and layer2+3 hash formula documentation When using layer2 or layer2+3 hash, only the 5th byte of the MAC addresses is used. Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- Documentation/networking/bonding.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 525e6842dd33..43be3782e5df 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -894,7 +894,7 @@ xmit_hash_policy Uses XOR of hardware MAC addresses and packet type ID field to generate the hash. The formula is - hash = source MAC XOR destination MAC XOR packet type ID + hash = source MAC[5] XOR destination MAC[5] XOR packet type ID slave number = hash modulo slave count This algorithm will place all traffic to a particular @@ -910,7 +910,7 @@ xmit_hash_policy Uses XOR of hardware MAC addresses and IP addresses to generate the hash. The formula is - hash = source MAC XOR destination MAC XOR packet type ID + hash = source MAC[5] XOR destination MAC[5] XOR packet type ID hash = hash XOR source IP XOR destination IP hash = hash XOR (hash RSHIFT 16) hash = hash XOR (hash RSHIFT 8) From 726e2c5929de841fdcef4e2bf995680688ae1b87 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 6 Apr 2022 16:18:54 +0200 Subject: [PATCH 284/423] veth: Ensure eth header is in skb's linear part After feeding a decapsulated packet to a veth device with act_mirred, skb_headlen() may be 0. But veth_xmit() calls __dev_forward_skb(), which expects at least ETH_HLEN byte of linear data (as __dev_forward_skb2() calls eth_type_trans(), which pulls ETH_HLEN bytes unconditionally). Use pskb_may_pull() to ensure veth_xmit() respects this constraint. kernel BUG at include/linux/skbuff.h:2328! RIP: 0010:eth_type_trans+0xcf/0x140 Call Trace: __dev_forward_skb2+0xe3/0x160 veth_xmit+0x6e/0x250 [veth] dev_hard_start_xmit+0xc7/0x200 __dev_queue_xmit+0x47f/0x520 ? skb_ensure_writable+0x85/0xa0 ? skb_mpls_pop+0x98/0x1c0 tcf_mirred_act+0x442/0x47e [act_mirred] tcf_action_exec+0x86/0x140 fl_classify+0x1d8/0x1e0 [cls_flower] ? dma_pte_clear_level+0x129/0x1a0 ? dma_pte_clear_level+0x129/0x1a0 ? prb_fill_curr_block+0x2f/0xc0 ? skb_copy_bits+0x11a/0x220 __tcf_classify+0x58/0x110 tcf_classify_ingress+0x6b/0x140 __netif_receive_skb_core.constprop.0+0x47d/0xfd0 ? __iommu_dma_unmap_swiotlb+0x44/0x90 __netif_receive_skb_one_core+0x3d/0xa0 netif_receive_skb+0x116/0x170 be_process_rx+0x22f/0x330 [be2net] be_poll+0x13c/0x370 [be2net] __napi_poll+0x2a/0x170 net_rx_action+0x22f/0x2f0 __do_softirq+0xca/0x2a8 __irq_exit_rcu+0xc1/0xe0 common_interrupt+0x83/0xa0 Fixes: e314dbdc1c0d ("[NET]: Virtual ethernet device driver.") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1b5714926d81..eb0121a64d6d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -320,7 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } From 1b808993e19447731e823b1313ee4e8da7fd92a0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Apr 2022 14:15:21 -0700 Subject: [PATCH 285/423] flow_dissector: fix false-positive __read_overflow2_field() warning Bounds checking is unhappy that we try to copy both Ethernet addresses but pass pointer to the first one. Luckily destination address is the first field so pass the pointer to the entire header, whatever. Signed-off-by: Jakub Kicinski Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9bd887610c18..6f7ec72016dc 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1032,7 +1032,7 @@ bool __skb_flow_dissect(const struct net *net, key_eth_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS, target_container); - memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); + memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs)); } proto_again: From 7cea5560bf656b84f9ed01c0cc829d4eecd0640b Mon Sep 17 00:00:00 2001 From: Hongbin Wang Date: Wed, 6 Apr 2022 22:46:22 -0400 Subject: [PATCH 286/423] vxlan: fix error return code in vxlan_fdb_append When kmalloc and dst_cache_init failed, should return ENOMEM rather than ENOBUFS. Signed-off-by: Hongbin Wang Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index de97ff98d36e..8a5e3a6d32d7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -651,11 +651,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) - return -ENOBUFS; + return -ENOMEM; if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { kfree(rd); - return -ENOBUFS; + return -ENOMEM; } rd->remote_ip = *ip; From 98c27add5d96485db731a92dac31567b0486cae8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Apr 2022 23:16:57 +0200 Subject: [PATCH 287/423] ALSA: usb-audio: Cap upper limits of buffer/period bytes for implicit fb In the implicit feedback mode, some parameters are tied between both playback and capture streams. One of the tied parameters is the period size, and this can be a problem if the device has different number of channels to both streams. Assume that an application opens a playback stream that has an implicit feedback from a capture stream, and it allocates up to the max period and buffer size as much as possible. When the capture device supports only more channels than the playback, the minimum period and buffer sizes become larger than the sizes the playback stream took. That is, the minimum size will be over the max size the driver limits, and PCM core sees as if no available configuration is found, returning -EINVAL mercilessly. For avoiding this problem, we have to look through the counter part of audioformat list for each sync ep, and checks the channels. If more channels are found there, we reduce the max period and buffer sizes accordingly. You may wonder that the patch adds only the evaluation of channels between streams, and what about other parameters? Both the format and the rate are tied in the implicit fb mode, hence they are always identical. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215792 Fixes: 5a6c3e11c9c9 ("ALSA: usb-audio: Add hw constraint for implicit fb sync") Cc: Link: https://lore.kernel.org/r/20220407211657.15087-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index cec6e91afea2..6a460225f2e3 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,6 +659,9 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif +#define MAX_BUFFER_BYTES (1024 * 1024) +#define MAX_PERIOD_BYTES (512 * 1024) + static const struct snd_pcm_hardware snd_usb_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -669,9 +672,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, - .buffer_bytes_max = 1024 * 1024, + .buffer_bytes_max = MAX_BUFFER_BYTES, .period_bytes_min = 64, - .period_bytes_max = 512 * 1024, + .period_bytes_max = MAX_PERIOD_BYTES, .periods_min = 2, .periods_max = 1024, }; @@ -971,6 +974,78 @@ static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, ep->cur_buffer_periods); } +/* get the adjusted max buffer (or period) bytes that can fit with the + * paired format for implicit fb + */ +static unsigned int +get_adjusted_max_bytes(struct snd_usb_substream *subs, + struct snd_usb_substream *pair, + struct snd_pcm_hw_params *params, + unsigned int max_bytes, + bool reverse_map) +{ + const struct audioformat *fp, *pp; + unsigned int rmax = 0, r; + + list_for_each_entry(fp, &subs->fmt_list, list) { + if (!fp->implicit_fb) + continue; + if (!reverse_map && + !hw_check_valid_format(subs, params, fp)) + continue; + list_for_each_entry(pp, &pair->fmt_list, list) { + if (pp->iface != fp->sync_iface || + pp->altsetting != fp->sync_altsetting || + pp->ep_idx != fp->sync_ep_idx) + continue; + if (reverse_map && + !hw_check_valid_format(pair, params, pp)) + break; + if (!reverse_map && pp->channels > fp->channels) + r = max_bytes * fp->channels / pp->channels; + else if (reverse_map && pp->channels < fp->channels) + r = max_bytes * pp->channels / fp->channels; + else + r = max_bytes; + rmax = max(rmax, r); + break; + } + } + return rmax; +} + +/* Reduce the period or buffer bytes depending on the paired substream; + * when a paired configuration for implicit fb has a higher number of channels, + * we need to reduce the max size accordingly, otherwise it may become unusable + */ +static int hw_rule_bytes_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + struct snd_usb_substream *pair; + struct snd_interval *it; + unsigned int max_bytes; + unsigned int rmax; + + pair = &subs->stream->substream[!subs->direction]; + if (!pair->ep_num) + return 0; + + if (rule->var == SNDRV_PCM_HW_PARAM_PERIOD_BYTES) + max_bytes = MAX_PERIOD_BYTES; + else + max_bytes = MAX_BUFFER_BYTES; + + rmax = get_adjusted_max_bytes(subs, pair, params, max_bytes, false); + if (!rmax) + rmax = get_adjusted_max_bytes(pair, subs, params, max_bytes, true); + if (!rmax) + return 0; + + it = hw_param_interval(params, rule->var); + return apply_hw_params_minmax(it, 0, rmax); +} + /* * set up the runtime hardware information. */ @@ -1085,6 +1160,16 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, + hw_rule_bytes_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + hw_rule_bytes_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, -1); + if (err < 0) + return err; list_for_each_entry(fp, &subs->fmt_list, list) { if (fp->implicit_fb) { From fee2ec8cceb33b8886bc5894fb07e0b2e34148af Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Apr 2022 23:27:40 +0200 Subject: [PATCH 288/423] ALSA: usb-audio: Increase max buffer size The current limit of max buffer size 1MB seems too small for modern devices with lots of channels and high sample rates. Let's make bigger, 4MB. Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20220407212740.17920-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 6a460225f2e3..37ee6df8b15a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,7 +659,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif -#define MAX_BUFFER_BYTES (1024 * 1024) +#define MAX_BUFFER_BYTES (4 * 1024 * 1024) #define MAX_PERIOD_BYTES (512 * 1024) static const struct snd_pcm_hardware snd_usb_hardware = From d02b4dd84e1a90f7f1444d027c0289bf355b0d5a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:15 +0200 Subject: [PATCH 289/423] perf/imx_ddr: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: In file included from :0:0: In function ‘ddr_perf_counter_enable’, inlined from ‘ddr_perf_irq_handler’ at drivers/perf/fsl_imx8_ddr_perf.c:651:2: ././include/linux/compiler_types.h:352:38: error: call to ‘__compiletime_assert_729’ \ declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ... See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Frank Li Cc: Will Deacon Cc: Mark Rutland Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: linux-arm-kernel@lists.infradead.org Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220405151517.29753-10-bp@alien8.de Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 94ebc1ecace7..b1b2a55de77f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -29,7 +29,7 @@ #define CNTL_OVER_MASK 0xFFFFFFFE #define CNTL_CSV_SHIFT 24 -#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 From d7442f512b71fc63a99c8a801422dde4fbbf9f93 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 4 Apr 2022 18:15:09 +0200 Subject: [PATCH 290/423] ice: arfs: fix use-after-free when freeing @rx_cpu_rmap The CI testing bots triggered the following splat: [ 718.203054] BUG: KASAN: use-after-free in free_irq_cpu_rmap+0x53/0x80 [ 718.206349] Read of size 4 at addr ffff8881bd127e00 by task sh/20834 [ 718.212852] CPU: 28 PID: 20834 Comm: sh Kdump: loaded Tainted: G S W IOE 5.17.0-rc8_nextqueue-devqueue-02643-g23f3121aca93 #1 [ 718.219695] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0012.070720200218 07/07/2020 [ 718.223418] Call Trace: [ 718.227139] [ 718.230783] dump_stack_lvl+0x33/0x42 [ 718.234431] print_address_description.constprop.9+0x21/0x170 [ 718.238177] ? free_irq_cpu_rmap+0x53/0x80 [ 718.241885] ? free_irq_cpu_rmap+0x53/0x80 [ 718.245539] kasan_report.cold.18+0x7f/0x11b [ 718.249197] ? free_irq_cpu_rmap+0x53/0x80 [ 718.252852] free_irq_cpu_rmap+0x53/0x80 [ 718.256471] ice_free_cpu_rx_rmap.part.11+0x37/0x50 [ice] [ 718.260174] ice_remove_arfs+0x5f/0x70 [ice] [ 718.263810] ice_rebuild_arfs+0x3b/0x70 [ice] [ 718.267419] ice_rebuild+0x39c/0xb60 [ice] [ 718.270974] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 718.274472] ? ice_init_phy_user_cfg+0x360/0x360 [ice] [ 718.278033] ? delay_tsc+0x4a/0xb0 [ 718.281513] ? preempt_count_sub+0x14/0xc0 [ 718.284984] ? delay_tsc+0x8f/0xb0 [ 718.288463] ice_do_reset+0x92/0xf0 [ice] [ 718.292014] ice_pci_err_resume+0x91/0xf0 [ice] [ 718.295561] pci_reset_function+0x53/0x80 <...> [ 718.393035] Allocated by task 690: [ 718.433497] Freed by task 20834: [ 718.495688] Last potentially related work creation: [ 718.568966] The buggy address belongs to the object at ffff8881bd127e00 which belongs to the cache kmalloc-96 of size 96 [ 718.574085] The buggy address is located 0 bytes inside of 96-byte region [ffff8881bd127e00, ffff8881bd127e60) [ 718.579265] The buggy address belongs to the page: [ 718.598905] Memory state around the buggy address: [ 718.601809] ffff8881bd127d00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 718.604796] ffff8881bd127d80: 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc fc [ 718.607794] >ffff8881bd127e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 718.610811] ^ [ 718.613819] ffff8881bd127e80: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc [ 718.617107] ffff8881bd127f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc This is due to that free_irq_cpu_rmap() is always being called *after* (devm_)free_irq() and thus it tries to work with IRQ descs already freed. For example, on device reset the driver frees the rmap right before allocating a new one (the splat above). Make rmap creation and freeing function symmetrical with {request,free}_irq() calls i.e. do that on ifup/ifdown instead of device probe/remove/resume. These operations can be performed independently from the actual device aRFS configuration. Also, make sure ice_vsi_free_irq() clears IRQ affinity notifiers only when aRFS is disabled -- otherwise, CPU rmap sets and clears its own and they must not be touched manually. Fixes: 28bf26724fdb0 ("ice: Implement aRFS") Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Signed-off-by: Alexander Lobakin Tested-by: Ivan Vecera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 9 ++------- drivers/net/ethernet/intel/ice/ice_lib.c | 5 ++++- drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++++---------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 5daade32ea62..fba178e07600 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -577,7 +577,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; - if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list) + if (!vsi || vsi->type != ICE_VSI_PF) return; netdev = vsi->netdev; @@ -599,7 +599,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) int base_idx, i; if (!vsi || vsi->type != ICE_VSI_PF) - return -EINVAL; + return 0; pf = vsi->back; netdev = vsi->netdev; @@ -636,7 +636,6 @@ void ice_remove_arfs(struct ice_pf *pf) if (!pf_vsi) return; - ice_free_cpu_rx_rmap(pf_vsi); ice_clear_arfs(pf_vsi); } @@ -653,9 +652,5 @@ void ice_rebuild_arfs(struct ice_pf *pf) return; ice_remove_arfs(pf); - if (ice_set_cpu_rx_rmap(pf_vsi)) { - dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n"); - return; - } ice_init_arfs(pf_vsi); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2774cbd5b12a..6d19c58ccacd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2689,6 +2689,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) return; vsi->irqs_ready = false; + ice_free_cpu_rx_rmap(vsi); + ice_for_each_q_vector(vsi, i) { u16 vector = i + base; int irq_num; @@ -2702,7 +2704,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) continue; /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); + if (!IS_ENABLED(CONFIG_RFS_ACCEL)) + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d768925785ca..5b1198859da7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2510,6 +2510,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } + err = ice_set_cpu_rx_rmap(vsi); + if (err) { + netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n", + vsi->vsi_num, ERR_PTR(err)); + goto free_q_irqs; + } + vsi->irqs_ready = true; return 0; @@ -3692,20 +3699,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf) */ ice_napi_add(vsi); - status = ice_set_cpu_rx_rmap(vsi); - if (status) { - dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", - vsi->vsi_num, status); - goto unroll_napi_add; - } status = ice_init_mac_fltr(pf); if (status) - goto free_cpu_rx_map; + goto unroll_napi_add; return 0; -free_cpu_rx_map: - ice_free_cpu_rx_rmap(vsi); unroll_napi_add: ice_tc_indir_block_unregister(vsi); unroll_cfg_netdev: @@ -5167,7 +5166,6 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } - ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); From 7d59706dbef8de83b3662026766507bc494223d7 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 24 Mar 2022 14:19:15 +0100 Subject: [PATCH 291/423] Revert "iavf: Fix deadlock occurrence during resetting VF interface" This change caused a regression with resetting while changing network namespaces. By clearing the IFF_UP flag, the kernel now thinks it has fully closed the device. This reverts commit 0cc318d2e8408bc0ffb4662a0c3e5e57005ac6ff. Fixes: 0cc318d2e840 ("iavf: Fix deadlock occurrence during resetting VF interface") Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 190590d32faf..7dfcf78b57fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2871,7 +2871,6 @@ continue_reset: running = adapter->state == __IAVF_RUNNING; if (running) { - netdev->flags &= ~IFF_UP; netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); adapter->link_up = false; @@ -2988,7 +2987,7 @@ continue_reset: * to __IAVF_RUNNING */ iavf_up_complete(adapter); - netdev->flags |= IFF_UP; + iavf_irq_enable(adapter, true); } else { iavf_change_state(adapter, __IAVF_DOWN); @@ -3004,10 +3003,8 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) { + if (running) iavf_change_state(adapter, __IAVF_RUNNING); - netdev->flags |= IFF_UP; - } dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } From 2bbac98d0930e8161b1957dc0ec99de39ade1b3c Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Fri, 8 Apr 2022 09:35:23 -0400 Subject: [PATCH 292/423] RDMA/hfi1: Fix use-after-free bug for mm struct Under certain conditions, such as MPI_Abort, the hfi1 cleanup code may represent the last reference held on the task mm. hfi1_mmu_rb_unregister() then drops the last reference and the mm is freed before the final use in hfi1_release_user_pages(). A new task may allocate the mm structure while it is still being used, resulting in problems. One manifestation is corruption of the mmap_sem counter leading to a hang in down_write(). Another is corruption of an mm struct that is in use by another task. Fixes: 3d2a9d642512 ("IB/hfi1: Ensure correct mm is used at all times") Link: https://lore.kernel.org/r/20220408133523.122165.72975.stgit@awfm-01.cornelisnetworks.com Cc: Signed-off-by: Douglas Miller Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mmu_rb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 876cc78a22cc..7333646021bb 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) unsigned long flags; struct list_head del_list; + /* Prevent freeing of mm until we are completely finished. */ + mmgrab(handler->mn.mm); + /* Unregister first so we don't get any more notifications. */ mmu_notifier_unregister(&handler->mn, handler->mn.mm); @@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) do_remove(handler, &del_list); + /* Now the mm may be freed. */ + mmdrop(handler->mn.mm); + kfree(handler); } From e8cf229ebe5eb31eecee86268223530a872872c2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 3 Apr 2022 20:19:46 -0700 Subject: [PATCH 293/423] tools/testing/nvdimm: Fix security_init() symbol collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with the new perf-event support in the nvdimm core, the nfit_test mock module stops compiling. Rename its security_init() to nfit_security_init(). tools/testing/nvdimm/test/nfit.c:1845:13: error: conflicting types for ‘security_init’; have ‘void(struct nfit_test *)’ 1845 | static void security_init(struct nfit_test *t) | ^~~~~~~~~~~~~ In file included from ./include/linux/perf_event.h:61, from ./include/linux/nd.h:11, from ./drivers/nvdimm/nd-core.h:11, from tools/testing/nvdimm/test/nfit.c:19: Fixes: 9a61d0838cd0 ("drivers/nvdimm: Add nvdimm pmu structure") Cc: Kajol Jain Reviewed-by: Kajol Jain Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/164904238610.1330275.1889212115373993727.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 65dbdda3a054..1da76ccde448 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t) return 0; } -static void security_init(struct nfit_test *t) +static void nfit_security_init(struct nfit_test *t) { int i; @@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t) if (nfit_test_dimm_init(t)) return -ENOMEM; smart_init(t); - security_init(t); + nfit_security_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } From d28820419ca332f856cdf8bef0cafed79c29ed05 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 8 Apr 2022 12:58:44 -0700 Subject: [PATCH 294/423] cxl/pci: Drop shadowed variable 0day reports that wait_for_media_ready() declares an @rc variable twice. >> drivers/cxl/pci.c:439:7: warning: Local variable 'rc' shadows outer variable [shadowVariable] int rc; ^ drivers/cxl/pci.c:431:6: note: Shadowed declaration int rc, i; ^ drivers/cxl/pci.c:439:7: note: Shadow variable int rc; ^ Cc: Randy Dunlap Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active") Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reported-by: kernel test robot Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/164944636936.455177.14136200464724208233.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8a7267d116b7..3f2182d66829 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -436,7 +436,6 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) for (i = mbox_ready_timeout; i; i--) { u32 temp; - int rc; rc = pci_read_config_dword( pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Apr 2022 11:08:58 -0600 Subject: [PATCH 295/423] io_uring: fix race between timeout flush and removal io_flush_timeouts() assumes the timeout isn't in progress of triggering or being removed/canceled, so it unconditionally removes it from the timeout list and attempts to cancel it. Leave it on the list and let the normal timeout cancelation take care of it. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fafd1ca4780b..659f8ecba5b7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1736,12 +1736,11 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) __must_hold(&ctx->completion_lock) { u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_kiocb *req, *tmp; spin_lock_irq(&ctx->timeout_lock); - while (!list_empty(&ctx->timeout_list)) { + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { u32 events_needed, events_got; - struct io_kiocb *req = list_first_entry(&ctx->timeout_list, - struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; @@ -1758,7 +1757,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) if (events_got < events_needed) break; - list_del_init(&req->timeout.list); io_kill_timeout(req, 0); } ctx->cq_last_tm_flush = seq; @@ -6628,6 +6626,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) return -EINVAL; + INIT_LIST_HEAD(&req->timeout.list); data->mode = io_translate_timeout_mode(flags); hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); From 74befa447e6839cdd90ed541159ec783726946f9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Apr 2022 19:55:38 +0300 Subject: [PATCH 296/423] net: mdio: don't defer probe forever if PHY IRQ provider is missing When a driver for an interrupt controller is missing, of_irq_get() returns -EPROBE_DEFER ad infinitum, causing fwnode_mdiobus_phy_device_register(), and ultimately, the entire of_mdiobus_register() call, to fail. In turn, any phy_connect() call towards a PHY on this MDIO bus will also fail. This is not what is expected to happen, because the PHY library falls back to poll mode when of_irq_get() returns a hard error code, and the MDIO bus, PHY and attached Ethernet controller work fine, albeit suboptimally, when the PHY library polls for link status. However, -EPROBE_DEFER has special handling given the assumption that at some point probe deferral will stop, and the driver for the supplier will kick in and create the IRQ domain. Reasons for which the interrupt controller may be missing: - It is not yet written. This may happen if a more recent DT blob (with an interrupt-parent for the PHY) is used to boot an old kernel where the driver didn't exist, and that kernel worked with the vintage-correct DT blob using poll mode. - It is compiled out. Behavior is the same as above. - It is compiled as a module. The kernel will wait for a number of seconds specified in the "deferred_probe_timeout" boot parameter for user space to load the required module. The current default is 0, which times out at the end of initcalls. It is possible that this might cause regressions unless users adjust this boot parameter. The proposed solution is to use the driver_deferred_probe_check_state() helper function provided by the driver core, which gives up after some -EPROBE_DEFER attempts, taking "deferred_probe_timeout" into consideration. The return code is changed from -EPROBE_DEFER into -ENODEV or -ETIMEDOUT, depending on whether the kernel is compiled with support for modules or not. Fixes: 66bdede495c7 ("of_mdio: Fix broken PHY IRQ in case of probe deferral") Suggested-by: Robin Murphy Signed-off-by: Vladimir Oltean Acked-by: Greg Kroah-Hartman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220407165538.4084809-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/base/dd.c | 1 + drivers/net/mdio/fwnode_mdio.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index af6bea56f4e2..3fc3b5940bb3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -296,6 +296,7 @@ int driver_deferred_probe_check_state(struct device *dev) return -EPROBE_DEFER; } +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); static void deferred_probe_timeout_work_func(struct work_struct *work) { diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1becb1a731f6..1c1584fca632 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, int rc; rc = fwnode_irq_get(child, 0); + /* Don't wait forever if the IRQ provider doesn't become available, + * just fall back to poll mode + */ + if (rc == -EPROBE_DEFER) + rc = driver_deferred_probe_check_state(&phy->mdio.dev); if (rc == -EPROBE_DEFER) return rc; From d452088cdfd5a4ad9d96d847d2273fe958d6339b Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 7 Apr 2022 10:07:03 +0300 Subject: [PATCH 297/423] mlxsw: i2c: Fix initialization error flow Add mutex_destroy() call in driver initialization error flow. Fixes: 6882b0aee180f ("mlxsw: Introduce support for I2C bus") Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220407070703.2421076-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 939b692ffc33..ce843ea91464 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; From e2d88f9ce678cd33763826ae2f0412f181251314 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 7 Apr 2022 09:24:22 -0400 Subject: [PATCH 298/423] sctp: use the correct skb for security_sctp_assoc_request Yi Chen reported an unexpected sctp connection abort, and it occurred when COOKIE_ECHO is bundled with DATA Fragment by SCTP HW GSO. As the IP header is included in chunk->head_skb instead of chunk->skb, it failed to check IP header version in security_sctp_assoc_request(). According to Ondrej, SELinux only looks at IP header (address and IPsec options) and XFRM state data, and these are all included in head_skb for SCTP HW GSO packets. So fix it by using head_skb when calling security_sctp_assoc_request() in processing COOKIE_ECHO. v1->v2: - As Ondrej noticed, chunk->head_skb should also be used for security_sctp_assoc_established() in sctp_sf_do_5_1E_ca(). Fixes: e215dab1c490 ("security: call security_sctp_assoc_request in sctp_sf_do_5_1D_ce") Reported-by: Yi Chen Signed-off-by: Xin Long Reviewed-by: Ondrej Mosnacek Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/71becb489e51284edf0c11fc15246f4ed4cef5b6.1649337862.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f342bc12735..52edee1322fc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -781,7 +781,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -932,7 +932,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, /* Set peer label for connection. */ if (security_sctp_assoc_established((struct sctp_association *)asoc, - chunk->skb)) + chunk->head_skb ?: chunk->skb)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. @@ -2262,7 +2262,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } From e65812fd22eba32f11abe28cb377cbd64cfb1ba0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 7 Apr 2022 11:29:23 -0300 Subject: [PATCH 299/423] net/sched: fix initialization order when updating chain 0 head Currently, when inserting a new filter that needs to sit at the head of chain 0, it will first update the heads pointer on all devices using the (shared) block, and only then complete the initialization of the new element so that it has a "next" element. This can lead to a situation that the chain 0 head is propagated to another CPU before the "next" initialization is done. When this race condition is triggered, packets being matched on that CPU will simply miss all other filters, and will flow through the stack as if there were no other filters installed. If the system is using OVS + TC, such packets will get handled by vswitchd via upcall, which results in much higher latency and reordering. For other applications it may result in packet drops. This is reproducible with a tc only setup, but it varies from system to system. It could be reproduced with a shared block amongst 10 veth tunnels, and an ingress filter mirroring packets to another veth. That's because using the last added veth tunnel to the shared block to do the actual traffic, it makes the race window bigger and easier to trigger. The fix is rather simple, to just initialize the next pointer of the new filter instance (tp) before propagating the head change. The fixes tag is pointing to the original code though this issue should only be observed when using it unlocked. Fixes: 2190d1d0944f ("net: sched: introduce helpers to work with filter chains") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Vlad Buslov Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/b97d5f4eaffeeb9d058155bcab63347527261abf.1649341369.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2957f8f5cea7..f0699f39afdb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1672,10 +1672,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; From ea5dc046127e857a7873ae55fd57c866e9e86fb2 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 30 Mar 2022 17:47:59 +0800 Subject: [PATCH 300/423] cachefiles: unmark inode in use in error path Unmark inode in use if error encountered. If the in-use flag leakage occurs in cachefiles_open_file(), Cachefiles will complain "Inode already in use" when later another cookie with the same index key is looked up. If the in-use flag leakage occurs in cachefiles_create_tmpfile(), though the "Inode already in use" warning won't be triggered, fix the leakage anyway. Reported-by: Gao Xiang Fixes: 1f08c925e7a3 ("cachefiles: Implement backing file wrangling") Signed-off-by: Jeffle Xu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006615.html # v1 Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006618.html # v2 --- fs/cachefiles/namei.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f256c8aff7bb..ca9f3e4ec4b3 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, trace_cachefiles_mark_inactive(object, inode); } +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct inode *inode = d_backing_inode(dentry); + + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, dentry); + inode_unlock(inode); +} + /* * Unmark a backing inode and tell cachefilesd that there's something that can * be culled. @@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, struct inode *inode = file_inode(file); if (inode) { - inode_lock(inode); - __cachefiles_unmark_inode_in_use(object, file->f_path.dentry); - inode_unlock(inode); + cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry); if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { atomic_long_add(inode->i_blocks, &cache->b_released); @@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) object, d_backing_inode(path.dentry), ret, cachefiles_trace_trunc_error); file = ERR_PTR(ret); - goto out_dput; + goto out_unuse; } } @@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry), PTR_ERR(file), cachefiles_trace_open_error); - goto out_dput; + goto out_unuse; } if (unlikely(!file->f_op->read_iter) || unlikely(!file->f_op->write_iter)) { fput(file); pr_notice("Cache does not support read_iter and write_iter\n"); file = ERR_PTR(-EINVAL); + goto out_unuse; } + goto out_dput; + +out_unuse: + cachefiles_do_unmark_inode_in_use(object, path.dentry); out_dput: dput(path.dentry); out: @@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_object *object, check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); - if (ret == -ESTALE) { - fput(file); - dput(dentry); + fput(file); + dput(dentry); + if (ret == -ESTALE) return cachefiles_create_file(object); - } + return false; + error_fput: fput(file); error: + cachefiles_do_unmark_inode_in_use(object, dentry); dput(dentry); return false; } From 7b2f6c306601240635c72caa61f682e74d4591b2 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 5 Apr 2022 09:46:49 -0400 Subject: [PATCH 301/423] cachefiles: Fix KASAN slab-out-of-bounds in cachefiles_set_volume_xattr Use the actual length of volume coherency data when setting the xattr to avoid the following KASAN report. BUG: KASAN: slab-out-of-bounds in cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] Write of size 4 at addr ffff888101e02af4 by task kworker/6:0/1347 CPU: 6 PID: 1347 Comm: kworker/6:0 Kdump: loaded Not tainted 5.18.0-rc1-nfs-fscache-netfs+ #13 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 Workqueue: events fscache_create_volume_work [fscache] Call Trace: dump_stack_lvl+0x45/0x5a print_report.cold+0x5e/0x5db ? __lock_text_start+0x8/0x8 ? cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] kasan_report+0xab/0x120 ? cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] kasan_check_range+0xf5/0x1d0 memcpy+0x39/0x60 cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] cachefiles_acquire_volume+0x2be/0x500 [cachefiles] ? __cachefiles_free_volume+0x90/0x90 [cachefiles] fscache_create_volume_work+0x68/0x160 [fscache] process_one_work+0x3b7/0x6a0 worker_thread+0x2c4/0x650 ? process_one_work+0x6a0/0x6a0 kthread+0x16c/0x1a0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Allocated by task 1347: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 cachefiles_set_volume_xattr+0x76/0x350 [cachefiles] cachefiles_acquire_volume+0x2be/0x500 [cachefiles] fscache_create_volume_work+0x68/0x160 [fscache] process_one_work+0x3b7/0x6a0 worker_thread+0x2c4/0x650 kthread+0x16c/0x1a0 ret_from_fork+0x22/0x30 The buggy address belongs to the object at ffff888101e02af0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 4 bytes inside of 8-byte region [ffff888101e02af0, ffff888101e02af8) The buggy address belongs to the physical page: page:00000000a2292d70 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x101e02 flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0000200 0000000000000000 dead000000000001 ffff888100042280 raw: 0000000000000000 0000000080660066 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888101e02980: fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc ffff888101e02a00: 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 >ffff888101e02a80: fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 04 fc ^ ffff888101e02b00: fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc ffff888101e02b80: fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc ================================================================== Fixes: 413a4a6b0b55 "cachefiles: Fix volume coherency attribute" Signed-off-by: Dave Wysochanski Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/20220405134649.6579-1-dwysocha@redhat.com/ # v1 Link: https://lore.kernel.org/r/20220405142810.8208-1-dwysocha@redhat.com/ # Incorrect v2 --- fs/cachefiles/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 35465109d9c4..00b087c14995 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -203,7 +203,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) if (!buf) return false; buf->reserved = cpu_to_be32(0); - memcpy(buf->data, p, len); + memcpy(buf->data, p, volume->vcookie->coherency_len); ret = cachefiles_inject_write_error(); if (ret == 0) From c54eead2a66914a36b4a9ea5bbeb95a768307cba Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 7 Apr 2022 18:28:32 +0800 Subject: [PATCH 302/423] docs: filesystems: caching/backend-api.rst: correct two relinquish APIs use 1. cache backend is using fscache_relinquish_cache() rather than fscache_relinquish_cookie() to reset the cache cookie. 2. No fscache_cache_relinquish() helper currently, it should be fscache_relinquish_cache(). Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006703.html # v1 Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006704.html # v2 --- Documentation/filesystems/caching/backend-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index be793c49a772..d7b2df5fd607 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -73,7 +73,7 @@ busy. If successful, the cache backend can then start setting up the cache. In the event that the initialisation fails, the cache backend should call:: - void fscache_relinquish_cookie(struct fscache_cache *cache); + void fscache_relinquish_cache(struct fscache_cache *cache); to reset and discard the cookie. @@ -125,7 +125,7 @@ outstanding accesses on the volume to complete before returning. When the the cache is completely withdrawn, fscache should be notified by calling:: - void fscache_cache_relinquish(struct fscache_cache *cache); + void fscache_relinquish_cache(struct fscache_cache *cache); to clear fields in the cookie and discard the caller's ref on it. From 5d3d5b9645b53691b2eee7607cf995bcfae46dd0 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 7 Apr 2022 19:02:39 +0800 Subject: [PATCH 303/423] docs: filesystems: caching/backend-api.rst: fix an object withdrawn API There's no fscache_are_objects_withdrawn() helper at all to test if cookie withdrawal is completed currently. The cache backend is using fscache_wait_for_objects() to wait all objects to be withdrawn. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006705.html # v1 --- Documentation/filesystems/caching/backend-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index d7b2df5fd607..d7507becf674 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -110,9 +110,9 @@ to withdraw them, calling:: on the cookie that each object belongs to. This schedules the specified cookie for withdrawal. This gets offloaded to a workqueue. The cache backend can -test for completion by calling:: +wait for completion by calling:: - bool fscache_are_objects_withdrawn(struct fscache_cookie *cache); + void fscache_wait_for_objects(struct fscache_cache *cache); Once all the cookies are withdrawn, a cache backend can withdraw all the volumes, calling:: From 2c547f299827c12244d613eb2ee3616d88f56088 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Wed, 6 Apr 2022 11:50:17 +0800 Subject: [PATCH 304/423] fscache: Remove the cookie parameter from fscache_clear_page_bits() The cookie is not used at all, remove it and update the usage in io.c and afs/write.c (which is the only user outside of fscache currently) at the same time. [DH: Amended the documentation also] Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006659.html --- .../filesystems/caching/netfs-api.rst | 23 +++++++++---------- fs/afs/write.c | 3 +-- fs/fscache/io.c | 5 ++-- include/linux/fscache.h | 4 +--- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst index 5066113acad5..7308d76a29dc 100644 --- a/Documentation/filesystems/caching/netfs-api.rst +++ b/Documentation/filesystems/caching/netfs-api.rst @@ -404,22 +404,21 @@ schedule a write of that region:: And if an error occurs before that point is reached, the marks can be removed by calling:: - void fscache_clear_page_bits(struct fscache_cookie *cookie, - struct address_space *mapping, + void fscache_clear_page_bits(struct address_space *mapping, loff_t start, size_t len, bool caching) -In both of these functions, the cookie representing the cache object to be -written to and a pointer to the mapping to which the source pages are attached -are passed in; start and len indicate the size of the region that's going to be -written (it doesn't have to align to page boundaries necessarily, but it does -have to align to DIO boundaries on the backing filesystem). The caching -parameter indicates if caching should be skipped, and if false, the functions -do nothing. +In these functions, a pointer to the mapping to which the source pages are +attached is passed in and start and len indicate the size of the region that's +going to be written (it doesn't have to align to page boundaries necessarily, +but it does have to align to DIO boundaries on the backing filesystem). The +caching parameter indicates if caching should be skipped, and if false, the +functions do nothing. -The write function takes some additional parameters: i_size indicates the size -of the netfs file and term_func indicates an optional completion function, to -which term_func_priv will be passed, along with the error or amount written. +The write function takes some additional parameters: the cookie representing +the cache object to be written to, i_size indicates the size of the netfs file +and term_func indicates an optional completion function, to which +term_func_priv will be passed, along with the error or amount written. Note that the write function will always run asynchronously and will unmark all the pages upon completion before calling term_func. diff --git a/fs/afs/write.c b/fs/afs/write.c index 6bcf1475511b..4763132ca57e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -616,8 +616,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, _debug("write discard %x @%llx [%llx]", len, start, i_size); /* The dirty region was entirely beyond the EOF. */ - fscache_clear_page_bits(afs_vnode_cache(vnode), - mapping, start, len, caching); + fscache_clear_page_bits(mapping, start, len, caching); afs_pages_written_back(vnode, start, len); ret = 0; } diff --git a/fs/fscache/io.c b/fs/fscache/io.c index c8c7fe9e9a6e..3af3b08a9bb3 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -235,8 +235,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, { struct fscache_write_request *wreq = priv; - fscache_clear_page_bits(fscache_cres_cookie(&wreq->cache_resources), - wreq->mapping, wreq->start, wreq->len, + fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, wreq->set_bits); if (wreq->term_func) @@ -296,7 +295,7 @@ abandon_end: abandon_free: kfree(wreq); abandon: - fscache_clear_page_bits(cookie, mapping, start, len, cond); + fscache_clear_page_bits(mapping, start, len, cond); if (term_func) term_func(term_func_priv, ret, false); } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6727fb0db619..e25539072463 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -573,7 +573,6 @@ int fscache_write(struct netfs_cache_resources *cres, /** * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages - * @cookie: The cookie representing the cache object * @mapping: The netfs inode to use as the source * @start: The start position in @mapping * @len: The amount of data to unlock @@ -582,8 +581,7 @@ int fscache_write(struct netfs_cache_resources *cres, * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's * waiting. */ -static inline void fscache_clear_page_bits(struct fscache_cookie *cookie, - struct address_space *mapping, +static inline void fscache_clear_page_bits(struct address_space *mapping, loff_t start, size_t len, bool caching) { From 19517e53740ec671c335f05089abe1f0720103c7 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Sat, 2 Apr 2022 12:47:43 +0800 Subject: [PATCH 305/423] fscache: Move fscache_cookies_seq_ops specific code under CONFIG_PROC_FS fscache_cookies_seq_ops is only used in proc.c that is compiled under enabled CONFIG_PROC_FS, so move related code under this config. The same case exsits in internal.h. Also, make fscache_lru_cookie_timeout static due to no user outside of cookie.c. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006649.html # v1 --- fs/fscache/cookie.c | 4 +++- fs/fscache/internal.h | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 9bb1ab5fe5ed..9d3cf0111709 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -30,7 +30,7 @@ static DEFINE_SPINLOCK(fscache_cookie_lru_lock); DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out); static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker); static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; -unsigned int fscache_lru_cookie_timeout = 10 * HZ; +static unsigned int fscache_lru_cookie_timeout = 10 * HZ; void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) { @@ -1069,6 +1069,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, } EXPORT_SYMBOL(__fscache_invalidate); +#ifdef CONFIG_PROC_FS /* * Generate a list of extant cookies in /proc/fs/fscache/cookies */ @@ -1145,3 +1146,4 @@ const struct seq_operations fscache_cookies_seq_ops = { .stop = fscache_cookies_seq_stop, .show = fscache_cookies_seq_show, }; +#endif diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index ed1c9ed737f2..1336f517e9b1 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -56,7 +56,9 @@ static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; +#ifdef CONFIG_PROC_FS extern const struct seq_operations fscache_cookies_seq_ops; +#endif extern struct timer_list fscache_cookie_lru_timer; extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); @@ -137,7 +139,9 @@ int fscache_stats_show(struct seq_file *m, void *v); /* * volume.c */ +#ifdef CONFIG_PROC_FS extern const struct seq_operations fscache_volumes_seq_ops; +#endif struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); From b3c958c20a61fb8514fa16e3edcb421703600ee0 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Fri, 1 Apr 2022 14:37:15 +0800 Subject: [PATCH 306/423] fscache: Use wrapper fscache_set_cache_state() directly when relinquishing We already have the wrapper function to set cache state. Signed-off-by: Yue Hu Signed-off-by: David Howells Reviewed-by: Jeffle Xu cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006648.html # v1 --- fs/fscache/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 2749933852a9..d645f8b302a2 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -214,7 +214,7 @@ void fscache_relinquish_cache(struct fscache_cache *cache) cache->ops = NULL; cache->cache_priv = NULL; - smp_store_release(&cache->state, FSCACHE_CACHE_IS_NOT_PRESENT); + fscache_set_cache_state(cache, FSCACHE_CACHE_IS_NOT_PRESENT); fscache_put_cache(cache, where); } EXPORT_SYMBOL(fscache_relinquish_cache); From 61132ceeda723d2c48cbc2610ca3213a7fcb083b Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 31 Mar 2022 19:57:18 +0800 Subject: [PATCH 307/423] fscache: remove FSCACHE_OLD_API Kconfig option Commit 01491a756578 ("fscache, cachefiles: Disable configuration") added the FSCACHE_OLD_API configuration when rewritten. Now, it's not used any more. Remove it. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006647.html # v1 --- fs/fscache/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 76316c4a3fb7..b313a978ae0a 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -38,6 +38,3 @@ config FSCACHE_DEBUG enabled by setting bits in /sys/modules/fscache/parameter/debug. See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_OLD_API - bool From a04cd1600b831a16625b45226b90a292c8f6e8d9 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 8 Apr 2022 13:08:52 -0700 Subject: [PATCH 308/423] mm: migrate: use thp_order instead of HPAGE_PMD_ORDER for new page allocation. Fix a VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages) crash. With folios support, it is possible to have other than HPAGE_PMD_ORDER THPs, in the form of folios, in the system. Use thp_order() to correctly determine the source page order during migration. Link: https://lkml.kernel.org/r/20220404165325.1883267-1-zi.yan@sent.com Link: https://lore.kernel.org/linux-mm/20220404132908.GA785673@u2004/ Fixes: d68eccad3706 ("mm/filemap: Allow large folios to be added to the page cache") Reported-by: Naoya Horiguchi Signed-off-by: Zi Yan Cc: Matthew Wilcox Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- mm/migrate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2516d31db6c..358b7c11426d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1209,7 +1209,7 @@ static struct page *new_page(struct page *page, unsigned long start) struct page *thp; thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); + thp_order(page)); if (!thp) return NULL; prep_transhuge_page(thp); diff --git a/mm/migrate.c b/mm/migrate.c index de175e2fdba5..79e4b36f709a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1547,7 +1547,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) */ gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; + order = thp_order(page); } zidx = zone_idx(page_zone(page)); if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) From 66f133ceab7456c789f70a242991ed1b27ba1c3d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 8 Apr 2022 13:08:55 -0700 Subject: [PATCH 309/423] highmem: fix checks in __kmap_local_sched_{in,out} When CONFIG_DEBUG_KMAP_LOCAL is enabled __kmap_local_sched_{in,out} check that even slots in the tsk->kmap_ctrl.pteval are unmapped. The slots are initialized with 0 value, but the check is done with pte_none. 0 pte however does not necessarily mean that pte_none will return true. e.g. on xtensa it returns false, resulting in the following runtime warnings: WARNING: CPU: 0 PID: 101 at mm/highmem.c:627 __kmap_local_sched_out+0x51/0x108 CPU: 0 PID: 101 Comm: touch Not tainted 5.17.0-rc7-00010-gd3a1cdde80d2-dirty #13 Call Trace: dump_stack+0xc/0x40 __warn+0x8f/0x174 warn_slowpath_fmt+0x48/0xac __kmap_local_sched_out+0x51/0x108 __schedule+0x71a/0x9c4 preempt_schedule_irq+0xa0/0xe0 common_exception_return+0x5c/0x93 do_wp_page+0x30e/0x330 handle_mm_fault+0xa70/0xc3c do_page_fault+0x1d8/0x3c4 common_exception+0x7f/0x7f WARNING: CPU: 0 PID: 101 at mm/highmem.c:664 __kmap_local_sched_in+0x50/0xe0 CPU: 0 PID: 101 Comm: touch Tainted: G W 5.17.0-rc7-00010-gd3a1cdde80d2-dirty #13 Call Trace: dump_stack+0xc/0x40 __warn+0x8f/0x174 warn_slowpath_fmt+0x48/0xac __kmap_local_sched_in+0x50/0xe0 finish_task_switch$isra$0+0x1ce/0x2f8 __schedule+0x86e/0x9c4 preempt_schedule_irq+0xa0/0xe0 common_exception_return+0x5c/0x93 do_wp_page+0x30e/0x330 handle_mm_fault+0xa70/0xc3c do_page_fault+0x1d8/0x3c4 common_exception+0x7f/0x7f Fix it by replacing !pte_none(pteval) with pte_val(pteval) != 0. Link: https://lkml.kernel.org/r/20220403235159.3498065-1-jcmvbkbc@gmail.com Fixes: 5fbda3ecd14a ("sched: highmem: Store local kmaps in task struct") Signed-off-by: Max Filippov Reviewed-by: Thomas Gleixner Cc: "Peter Zijlstra (Intel)" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/highmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/highmem.c b/mm/highmem.c index 0cc0c4da7ed9..1a692997fac4 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -624,7 +624,7 @@ void __kmap_local_sched_out(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) @@ -661,7 +661,7 @@ void __kmap_local_sched_in(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) From eafc0a02391b7b36617b36c97c4b5d6832cf5e24 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Fri, 8 Apr 2022 13:08:58 -0700 Subject: [PATCH 310/423] lz4: fix LZ4_decompress_safe_partial read out of bound When partialDecoding, it is EOF if we've either filled the output buffer or can't proceed with reading an offset for following match. In some extreme corner cases when compressed data is suitably corrupted, UAF will occur. As reported by KASAN [1], LZ4_decompress_safe_partial may lead to read out of bound problem during decoding. lz4 upstream has fixed it [2] and this issue has been disscussed here [3] before. current decompression routine was ported from lz4 v1.8.3, bumping lib/lz4 to v1.9.+ is certainly a huge work to be done later, so, we'd better fix it first. [1] https://lore.kernel.org/all/000000000000830d1205cf7f0477@google.com/ [2] https://github.com/lz4/lz4/commit/c5d6f8a8be3927c0bec91bcc58667a6cfad244ad# [3] https://lore.kernel.org/all/CC666AE8-4CA4-4951-B6FB-A2EFDE3AC03B@fb.com/ Link: https://lkml.kernel.org/r/20211111105048.2006070-1-guoxuenan@huawei.com Reported-by: syzbot+63d688f1d899c588fb71@syzkaller.appspotmail.com Signed-off-by: Guo Xuenan Reviewed-by: Nick Terrell Acked-by: Gao Xiang Cc: Yann Collet Cc: Chengyang Fan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lz4/lz4_decompress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 926f4823d5ea..fd1728d94bab 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ From a431dbbc540532b7465eae4fc8b56a85a9fc7d17 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 8 Apr 2022 13:09:01 -0700 Subject: [PATCH 311/423] mm/sparsemem: fix 'mem_section' will never be NULL gcc 12 warning The gcc 12 compiler reports a "'mem_section' will never be NULL" warning on the following code: static inline struct mem_section *__nr_to_section(unsigned long nr) { #ifdef CONFIG_SPARSEMEM_EXTREME if (!mem_section) return NULL; #endif if (!mem_section[SECTION_NR_TO_ROOT(nr)]) return NULL; : It happens with CONFIG_SPARSEMEM_EXTREME off. The mem_section definition is #ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif In the !CONFIG_SPARSEMEM_EXTREME case, mem_section is a static 2-dimensional array and so the check "!mem_section[SECTION_NR_TO_ROOT(nr)]" doesn't make sense. Fix this warning by moving the "!mem_section[SECTION_NR_TO_ROOT(nr)]" check up inside the CONFIG_SPARSEMEM_EXTREME block and adding an explicit NR_SECTION_ROOTS check to make sure that there is no out-of-bound array access. Link: https://lkml.kernel.org/r/20220331180246.2746210-1-longman@redhat.com Fixes: 3e347261a80b ("sparsemem extreme implementation") Signed-off-by: Waiman Long Reported-by: Justin Forbes Cc: "Kirill A . Shutemov" Cc: Ingo Molnar Cc: Rafael Aquini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 962b14d403e8..46ffab808f03 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1397,13 +1397,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); From 01e67e04c28170c47700c2c226d732bbfedb1ad0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Apr 2022 13:09:04 -0700 Subject: [PATCH 312/423] mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0) If an mremap() syscall with old_size=0 ends up in move_page_tables(), it will call invalidate_range_start()/invalidate_range_end() unnecessarily, i.e. with an empty range. This causes a WARN in KVM's mmu_notifier. In the past, empty ranges have been diagnosed to be off-by-one bugs, hence the WARNing. Given the low (so far) number of unique reports, the benefits of detecting more buggy callers seem to outweigh the cost of having to fix cases such as this one, where userspace is doing something silly. In this particular case, an early return from move_page_tables() is enough to fix the issue. Link: https://lkml.kernel.org/r/20220329173155.172439-1-pbonzini@redhat.com Reported-by: syzbot+6bde52d89cfdf9f61425@syzkaller.appspotmail.com Signed-off-by: Paolo Bonzini Cc: Sean Christopherson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mremap.c b/mm/mremap.c index 9d76da79594d..303d3290b938 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -486,6 +486,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + if (!len) + return 0; + old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); From 4ad099559b00ac01c3726e5c95dc3108ef47d03e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 8 Apr 2022 13:09:07 -0700 Subject: [PATCH 313/423] mm/mempolicy: fix mpol_new leak in shared_policy_replace If mpol_new is allocated but not used in restart loop, mpol_new will be freed via mpol_put before returning to the caller. But refcnt is not initialized yet, so mpol_put could not do the right things and might leak the unused mpol_new. This would happen if mempolicy was updated on the shared shmem file while the sp->lock has been dropped during the memory allocation. This issue could be triggered easily with the below code snippet if there are many processes doing the below work at the same time: shmid = shmget((key_t)5566, 1024 * PAGE_SIZE, 0666|IPC_CREAT); shm = shmat(shmid, 0, 0); loop many times { mbind(shm, 1024 * PAGE_SIZE, MPOL_LOCAL, mask, maxnode, 0); mbind(shm + 128 * PAGE_SIZE, 128 * PAGE_SIZE, MPOL_DEFAULT, mask, maxnode, 0); } Link: https://lkml.kernel.org/r/20220329111416.27954-1-linmiaohe@huawei.com Fixes: 42288fe366c4 ("mm: mempolicy: Convert shared_policy mutex to spinlock") Signed-off-by: Miaohe Lin Acked-by: Michal Hocko Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: [3.8] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 358b7c11426d..88a74bc4cba5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2733,6 +2733,7 @@ alloc_new: mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } From 0347b2b95c3e1474c4b10f53df7ff2e841fda147 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 8 Apr 2022 13:09:10 -0700 Subject: [PATCH 314/423] mailmap: update Vasily Averin's email address I'm moving to a @linux.dev account. Map my old addresses. Link: https://lkml.kernel.org/r/737c7c2b-cdab-63ee-be90-cb33316c9657@linux.dev Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index b9d358217586..93458154ce7d 100644 --- a/.mailmap +++ b/.mailmap @@ -391,6 +391,10 @@ Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks +Vasily Averin +Vasily Averin +Vasily Averin +Vasily Averin Vinod Koul Vinod Koul Vinod Koul From b33e1044475afffaaabe51d35837aa10c09ba9ae Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 8 Apr 2022 13:09:13 -0700 Subject: [PATCH 315/423] mm/list_lru.c: revert "mm/list_lru: optimize memcg_reparent_list_lru_node()" Commit 405cc51fc104 ("mm/list_lru: optimize memcg_reparent_list_lru_node()") has subtle races which are proving ugly to fix. Revert the original optimization. If quantitative testing indicates that we have a significant problem here then other implementations can be looked at. Fixes: 405cc51fc104 ("mm/list_lru: optimize memcg_reparent_list_lru_node()") Acked-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Michal Hocko Cc: Waiman Long Cc: Roman Gushchin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/list_lru.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index c669d87001a6..ba76428ceece 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -394,12 +394,6 @@ static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid, int dst_idx = dst_memcg->kmemcg_id; struct list_lru_one *src, *dst; - /* - * If there is no lru entry in this nlru, we can skip it immediately. - */ - if (!READ_ONCE(nlru->nr_items)) - return; - /* * Since list_lru_{add,del} may be called under an IRQ-safe lock, * we have to use IRQ-safe primitives here to avoid deadlock. From 4071a1b9e24ee394b7492bff7542707ee9ad986d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 8 Apr 2022 13:09:16 -0700 Subject: [PATCH 316/423] MAINTAINERS: add Tom as clang reviewer I have been helping with build breaks and other clang things and would like to help with the reviews. Link: https://lkml.kernel.org/r/20220407175715.3378998-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Nathan Chancellor Acked-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3af36d852c38..17156b0a8925 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4791,6 +4791,7 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor M: Nick Desaulniers +R: Tom Rix L: llvm@lists.linux.dev S: Supported W: https://clangbuiltlinux.github.io/ From e6934e4048c91502efcb21da92b7ae37cd8fa741 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 8 Apr 2022 12:15:21 +0200 Subject: [PATCH 317/423] net: dsa: felix: suppress -EPROBE_DEFER errors The DSA master might not have been probed yet in which case the probe of the felix switch fails with -EPROBE_DEFER: [ 4.435305] mscc_felix 0000:00:00.5: Failed to register DSA switch: -517 It is not an error. Use dev_err_probe() to demote this particular error to a debug message. Fixes: 56051948773e ("net: dsa: ocelot: add driver for Felix switch family") Signed-off-by: Michael Walle Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220408101521.281886-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 8d382b27e625..52a8566071ed 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -2316,7 +2316,7 @@ static int felix_pci_probe(struct pci_dev *pdev, err = dsa_register_switch(ds); if (err) { - dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err); + dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n"); goto err_register_ds; } From 8d3a6c37d50d5a0504c126c932cc749e6dd9c78f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 8 Apr 2022 10:22:04 +0800 Subject: [PATCH 318/423] net: atlantic: Avoid out-of-bounds indexing UBSAN warnings are observed on atlantic driver: [ 294.432996] UBSAN: array-index-out-of-bounds in /build/linux-Qow4fL/linux-5.15.0/drivers/net/ethernet/aquantia/atlantic/aq_nic.c:484:48 [ 294.433695] index 8 is out of range for type 'aq_vec_s *[8]' The ring is dereferenced right before breaking out the loop, to prevent that from happening, only use the index in the loop to fix the issue. BugLink: https://bugs.launchpad.net/bugs/1958770 Tested-by: Mario Limonciello Signed-off-by: Kai-Heng Feng Reviewed-by: Igor Russkikh Link: https://lore.kernel.org/r/20220408022204.16815-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/aquantia/atlantic/aq_nic.c | 8 +++---- .../net/ethernet/aquantia/atlantic/aq_vec.c | 24 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 33f1a1377588..24d715c28a35 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -486,8 +486,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_vec_start(aq_vec); if (err < 0) goto err_exit; @@ -517,8 +517,8 @@ int aq_nic_start(struct aq_nic_s *self) mod_timer(&self->polling_timer, jiffies + AQ_CFG_POLLING_TIMER_INTERVAL); } else { - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_pci_func_alloc_irq(self, i, self->ndev->name, aq_vec_isr, aq_vec, aq_vec_get_affinity_mask(aq_vec)); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f4774cf051c9..6ab1f3212d24 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (!self) { err = -EINVAL; } else { - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp); ring[AQ_VEC_RX_ID].stats.rx.polls++; u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp); @@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, self->aq_hw_ops = aq_hw_ops; self->aq_hw = aq_hw; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX); if (err < 0) goto err_exit; @@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self) unsigned int i = 0U; int err = 0; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw, &ring[AQ_VEC_TX_ID]); if (err < 0) @@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self) struct aq_ring_s *ring = NULL; unsigned int i = 0U; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw, &ring[AQ_VEC_TX_ID]); @@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]); } @@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_free(&ring[AQ_VEC_TX_ID]); if (i < self->rx_rings) aq_ring_free(&ring[AQ_VEC_RX_ID]); From 8c3ce496bd612bd21679e445f75fcabb6be997b2 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:33 +0530 Subject: [PATCH 319/423] RISC-V: KVM: Don't clear hgatp CSR in kvm_arch_vcpu_put() We might have RISC-V systems (such as QEMU) where VMID is not part of the TLB entry tag so these systems will have to flush all TLB entries upon any change in hgatp.VMID. Currently, we zero-out hgatp CSR in kvm_arch_vcpu_put() and we re-program hgatp CSR in kvm_arch_vcpu_load(). For above described systems, this will flush all TLB entries whenever VCPU exits to user-space hence reducing performance. This patch fixes above described performance issue by not clearing hgatp CSR in kvm_arch_vcpu_put(). Fixes: 34bde9d8b9e6 ("RISC-V: KVM: Implement VCPU world-switch") Cc: stable@vger.kernel.org Signed-off-by: Anup Patel Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 624166004e36..6785aef4cbd4 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -653,8 +653,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); - csr_write(CSR_HGATP, 0); - csr->vsstatus = csr_read(CSR_VSSTATUS); csr->vsie = csr_read(CSR_VSIE); csr->vstvec = csr_read(CSR_VSTVEC); From fac3725364397f9a40a101f089b86ea655a58d06 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:44 +0530 Subject: [PATCH 320/423] KVM: selftests: riscv: Set PTE A and D bits in VS-stage page table Supporting hardware updates of PTE A and D bits is optional for any RISC-V implementation so current software strategy is to always set these bits in both G-stage (hypervisor) and VS-stage (guest kernel). If PTE A and D bits are not set by software (hypervisor or guest) then RISC-V implementations not supporting hardware updates of these bits will cause traps even for perfectly valid PTEs. Based on above explanation, the VS-stage page table created by various KVM selftest applications is not correct because PTE A and D bits are not set. This patch fixes VS-stage page table programming of PTE A and D bits for KVM selftests. Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V 64-bit") Signed-off-by: Anup Patel Tested-by: Mayuresh Chitale Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/include/riscv/processor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index dc284c6bdbc3..eca5c622efd2 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, #define PGTBL_PTE_WRITE_SHIFT 2 #define PGTBL_PTE_READ_MASK 0x0000000000000002ULL #define PGTBL_PTE_READ_SHIFT 1 -#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ +#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \ + PGTBL_PTE_DIRTY_MASK | \ + PGTBL_PTE_EXECUTE_MASK | \ PGTBL_PTE_WRITE_MASK | \ PGTBL_PTE_READ_MASK) #define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL From ebdef0de2dbc40e697adaa6b3408130f7a7b8351 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:51 +0530 Subject: [PATCH 321/423] KVM: selftests: riscv: Fix alignment of the guest_hang() function The guest_hang() function is used as the default exception handler for various KVM selftests applications by setting it's address in the vstvec CSR. The vstvec CSR requires exception handler base address to be at least 4-byte aligned so this patch fixes alignment of the guest_hang() function. Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V 64-bit") Signed-off-by: Anup Patel Tested-by: Mayuresh Chitale Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d377f2603d98..3961487a4870 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); } -static void guest_hang(void) +static void __aligned(16) guest_hang(void) { while (1) ; From 4054eee9290248bf66c5eacb58879c9aaad37f71 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 9 Apr 2022 09:16:00 +0530 Subject: [PATCH 322/423] RISC-V: KVM: include missing hwcap.h into vcpu_fp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vcpu_fp uses the riscv_isa_extension mechanism which gets defined in hwcap.h but doesn't include that head file. While it seems to work in most cases, in certain conditions this can lead to build failures like ../arch/riscv/kvm/vcpu_fp.c: In function ‘kvm_riscv_vcpu_fp_reset’: ../arch/riscv/kvm/vcpu_fp.c:22:13: error: implicit declaration of function ‘riscv_isa_extension_available’ [-Werror=implicit-function-declaration] 22 | if (riscv_isa_extension_available(&isa, f) || | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/riscv/kvm/vcpu_fp.c:22:49: error: ‘f’ undeclared (first use in this function) 22 | if (riscv_isa_extension_available(&isa, f) || Fix this by simply including the necessary header. Fixes: 0a86512dc113 ("RISC-V: KVM: Factor-out FP virtualization into separate sources") Signed-off-by: Heiko Stuebner Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_fp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 4449a976e5a6..d4308c512007 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) From 940442deea98b3280061095dd811e6136f1b41f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 09:12:55 -0300 Subject: [PATCH 323/423] tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: b04d910af330b55e ("vdpa: support exposing the count of vqs to userspace") a61280ddddaa45f9 ("vdpa: support exposing the config size to userspace") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h $ diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h --- tools/include/uapi/linux/vhost.h 2021-07-15 16:17:01.840818309 -0300 +++ include/uapi/linux/vhost.h 2022-04-02 18:55:05.702522387 -0300 @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2022-04-04 14:52:25.036375145 -0300 +++ after 2022-04-04 14:52:31.906549976 -0300 @@ -38,4 +38,6 @@ [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", [0x78] = "VDPA_GET_IOVA_RANGE", + [0x79] = "VDPA_GET_CONFIG_SIZE", + [0x80] = "VDPA_GET_VQS_COUNT", }; $ Cc: Longpeng Cc: Michael S. Tsirkin Link: https://lore.kernel.org/lkml/YksxoFcOARk%2Fldev@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c998860d7bbc..5d99e7c242a2 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif From 541f695cbcb6932c22638b06e0cbe1d56177e2e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Apr 2022 17:28:48 -0300 Subject: [PATCH 324/423] tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts Just like its done for ldopts and for both in tools/perf/Makefile.config. Using `` to initialize PERL_EMBED_CCOPTS somehow precludes using: $(filter-out SOMETHING_TO_FILTER,$(PERL_EMBED_CCOPTS)) And we need to do it to allow for building with versions of clang where some gcc options selected by distros are not available. Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YktYX2OnLtyobRYD@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c792e..90774b60d31b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -217,7 +217,7 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) $(OUTPUT)test-libperl.bin: From 41caff459a5b956b3e23ba9ca759dd0629ad3dda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Apr 2022 10:33:21 -0300 Subject: [PATCH 325/423] tools build: Filter out options and warnings not supported by clang These make the feature check fail when using clang, so remove them just like is done in tools/perf/Makefile.config to build perf itself. Adding -Wno-compound-token-split-by-macro to tools/perf/Makefile.config when building with clang is also necessary to avoid these warnings turned into errors (-Werror): CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o In file included from util/scripting-engines/trace-event-perl.c:35: In file included from /usr/lib64/perl5/CORE/perl.h:4085: In file included from /usr/lib64/perl5/CORE/hv.h:659: In file included from /usr/lib64/perl5/CORE/hv_func.h:34: In file included from /usr/lib64/perl5/CORE/sbox32_hash.h:4: /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '(' and '{' tokens introducing statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:38: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^~~~~~~~~~ /usr/lib64/perl5/CORE/perl.h:737:29: note: expanded from macro 'STMT_START' # define STMT_START (void)( /* gcc supports "({ STATEMENTS; })" */ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: '{' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:49: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '}' and ')' tokens terminating statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:87:41: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' v ^= (v>>23); \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: ')' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:88:3: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' } STMT_END ^~~~~~~~ /usr/lib64/perl5/CORE/perl.h:738:21: note: expanded from macro 'STMT_END' # define STMT_END ) ^ Please refer to the discussion on the Link: tag below, where Nathan clarifies the situation: acme> And then get to the problems at the end of this message, which seem acme> similar to the problem described here: acme> acme> From Nathan Chancellor <> acme> Subject [PATCH] mwifiex: Remove unnecessary braces from HostCmd_SET_SEQ_NO_BSS_INFO acme> acme> https://lkml.org/lkml/2020/9/1/135 acme> acme> So perhaps in this case its better to disable that acme> -Werror,-Wcompound-token-split-by-macro when building with clang? Yes, I think that is probably the best solution. As far as I can tell, at least in this file and context, the warning appears harmless, as the "create a GNU C statement expression from two different macros" is very much intentional, based on the presence of PERL_USE_GCC_BRACE_GROUPS. The warning is fixed in upstream Perl by just avoiding creating GNU C statement expressions using STMT_START and STMT_END: https://github.com/Perl/perl5/issues/18780 https://github.com/Perl/perl5/pull/18984 If I am reading the source code correctly, an alternative to disabling the warning would be specifying -DPERL_GCC_BRACE_GROUPS_FORBIDDEN but it seems like that might end up impacting more than just this site, according to the issue discussion above. Based-on-a-patch-by: Sedat Dilek Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YkxWcYzph5pC1EK8@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 7 +++++++ tools/perf/Makefile.config | 3 +++ 2 files changed, 10 insertions(+) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 90774b60d31b..de66e1cc0734 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -220,6 +220,13 @@ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6a8..5b5ba475a5c0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -790,6 +790,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif From dd6e1fe91cdd52774ca642d1da75b58a86356b56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2022 10:08:07 -0300 Subject: [PATCH 326/423] perf python: Fix probing for some clang command line options The clang compiler complains about some options even without a source file being available, while others require one, so use the simple tools/build/feature/test-hello.c file. Then check for the "is not supported" string in its output, in addition to the "unknown argument" already being looked for. This was noticed when building with clang-13 where -ffat-lto-objects isn't supported and since we were looking just for "unknown argument" and not providing a source code to clang, was mistakenly assumed as being available and not being filtered to set of command line options provided to clang, leading to a build failure. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 483f05004e68..6156bb87ee3e 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars From 3a8a0475861a443f02e3a9b57d044fe2a0a99291 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Apr 2022 11:04:20 -0300 Subject: [PATCH 327/423] perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-13 Using -ffat-lto-objects in the python feature test when building with clang-13 results in: clang-13: error: optimization flag '-ffat-lto-objects' is not supported [-Werror,-Wignored-optimization-argument] error: command '/usr/sbin/clang' failed with exit code 1 cp: cannot stat '/tmp/build/perf/python_ext_build/lib/perf*.so': No such file or directory make[2]: *** [Makefile.perf:639: /tmp/build/perf/python/perf.so] Error 1 Noticed when building on a docker.io/library/archlinux:base container. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +++ tools/perf/util/setup.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5b5ba475a5c0..f3bf9297bcc0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 6156bb87ee3e..c255a2c90cd6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,6 +25,8 @@ if cc_is_clang: vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension From 290fa68bdc4588637849adb8534301a1e62beee2 Mon Sep 17 00:00:00 2001 From: Chengdong Li Date: Fri, 8 Apr 2022 16:47:48 +0800 Subject: [PATCH 328/423] perf test tsc: Fix error message when not supported By default `perf test tsc` does not return the error message when the child process detected kernel does not support it. Instead, the child process prints an error message to stderr, unfortunately stderr is redirected to /dev/null when verbose <= 0. This patch does: - return TEST_SKIP to the parent process instead of TEST_OK when perf_read_tsc_conversion() is not supported. - Add a new subtest of testing if TSC is supported on current architecture by moving exist code to a separate function. It avoids two places in test__perf_time_to_tsc() that return TEST_SKIP by doing this. - Extend the test suite definition to contain above two subtests. Current test_suite and test_case structs do not support printing skip reason when the number of subtest less than 1. To print skip reason, it is necessary to extend current test suite definition. Reviewed-by: Adrian Hunter Signed-off-by: Chengdong Li Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: likexu@tencent.com Link: https://lore.kernel.org/r/20220408084748.43707-1-chengdongli@tencent.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 36 +++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d12d0ad81801..cc6df49a65a1 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -47,6 +47,17 @@ } \ } +static int test__tsc_is_supported(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + if (!TSC_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture\n"); + return TEST_SKIP; + } + + return TEST_OK; +} + /** * test__perf_time_to_tsc - test converting perf time to TSC. * @@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su struct perf_cpu_map *cpus = NULL; struct evlist *evlist = NULL; struct evsel *evsel = NULL; - int err = -1, ret, i; + int err = TEST_FAIL, ret, i; const char *comm1, *comm2; struct perf_tsc_conversion tc; struct perf_event_mmap_page *pc; @@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su u64 test_time, comm1_time = 0, comm2_time = 0; struct mmap *md; - if (!TSC_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -124,8 +131,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; + pr_debug("perf_read_tsc_conversion is not supported in current kernel\n"); + err = TEST_SKIP; } goto out_err; } @@ -191,7 +198,7 @@ next_event: test_tsc >= comm2_tsc) goto out_err; - err = 0; + err = TEST_OK; out_err: evlist__delete(evlist); @@ -200,4 +207,15 @@ out_err: return err; } -DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc); +static struct test_case time_to_tsc_tests[] = { + TEST_CASE_REASON("TSC support", tsc_is_supported, + "This architecture does not support"), + TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc, + "perf_read_tsc_conversion is not supported"), + { .name = NULL, } +}; + +struct test_suite suite__perf_time_to_tsc = { + .desc = "Convert perf time to TSC", + .test_cases = time_to_tsc_tests, +}; From 278aaba2c555a54e62aec40e04defaa9fffcc1c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 9 Apr 2022 11:48:15 -0300 Subject: [PATCH 329/423] tools headers arm64: Sync arm64's cputype.h with the kernel sources To get the changes in: 83bea32ac7ed37bb ("arm64: Add part number for Arm Cortex-A78AE") That addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/asm/cputype.h' differs from latest version at 'arch/arm64/include/asm/cputype.h' diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Cc: Ali Saidi Cc: Andrew Kilroy Cc: Chanho Park Cc: German Gomez Cc: James Clark Cc: John Garry Cc: Leo Yan Cc: Will Deacon Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9afcc6467a09..e09d6908a21d 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) From fa7095c5c3240bb2ecbc77f8b69be9b1d9e2cf60 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 6 Apr 2022 15:56:51 +0100 Subject: [PATCH 330/423] perf unwind: Don't show unwind error messages when augmenting frame pointer stack Commit Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") intended to add a 'best effort' DWARF unwind that improved the frame pointer stack in most scenarios. It's expected that the unwind will fail sometimes, but this shouldn't be reported as an error. It only works when the return address can be determined from the contents of the link register alone. Fix the error shown when the unwinder requires extra registers by adding a new flag that suppresses error messages. This flag is not set in the normal --call-graph=dwarf unwind mode so that behavior is not changed. Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") Reported-by: John Garry Signed-off-by: James Clark Tested-by: John Garry Cc: Alexander Shishkin Cc: Alexandre Truong Cc: German Gomez Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220406145651.1392529-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 2 +- .../perf/util/arm64-frame-pointer-unwind-support.c | 2 +- tools/perf/util/machine.c | 2 +- tools/perf/util/unwind-libdw.c | 10 +++++++--- tools/perf/util/unwind-libdw.h | 1 + tools/perf/util/unwind-libunwind-local.c | 10 +++++++--- tools/perf/util/unwind-libunwind.c | 6 ++++-- tools/perf/util/unwind.h | 13 ++++++++++--- 8 files changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d262060..afdca7f2959f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr } err = unwind__get_entries(unwind_entry, &cnt, thread, - &sample, MAX_STACK); + &sample, MAX_STACK, false); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 2242a885fbd7..4940be4a0569 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; } - ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); sample->user_regs = old_regs; if (ret || entries.length != 2) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b80048546451..95391236f5f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index a74b517f7497..94aa40f6e348 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg) bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg) report_module(pc, ui); if (!dwfl_frame_pc(state, &pc, &isactivation)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, - int max_stack) + int max_stack, + bool best_effort) { struct unwind_info *ui, ui_buf = { .sample = data, @@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .cb = cb, .arg = arg, .max_stack = max_stack, + .best_effort = best_effort }; Dwarf_Word ip; int err = -EINVAL, i; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 0cbd2650e280..8c88bc4f2304 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -20,6 +20,7 @@ struct unwind_info { void *arg; int max_stack; int idx; + bool best_effort; struct unwind_entry entries[]; }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 71a353349181..41e29fc7648a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -96,6 +96,7 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; + bool best_effort; }; #define dw_read(ptr, type, end) ({ \ @@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as, ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); + if (!ui->best_effort) + pr_err("unwind: can't read reg %d\n", regnum); return ret; } @@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, return -1; ret = unw_init_remote(&c, addr_space, ui); - if (ret) + if (ret && !ui->best_effort) display_error(ret); while (!ret && (unw_step(&c) > 0) && i < max_stack) { @@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->maps->machine, + .best_effort = best_effort }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e89a5479b361..509c287ee762 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { if (thread->maps->unwind_libunwind_ops) - return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, + max_stack, best_effort); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ab8ad469c8de..b2a03fa5289b 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -23,13 +23,19 @@ struct unwind_libunwind_ops { void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, bool best_effort); }; #ifdef HAVE_DWARF_UNWIND_SUPPORT +/* + * When best_effort is set, don't report errors and fail silently. This could + * be expanded in the future to be more permissive about things other than + * error messages. + */ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, + bool best_effort); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT #ifndef LIBUNWIND__ARCH_REG_ID @@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, - int max_stack __maybe_unused) + int max_stack __maybe_unused, + bool best_effort __maybe_unused) { return 0; } From ffab487052054162b3b6c9c6005777ec6cfcea05 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 8 Apr 2022 15:40:56 +0100 Subject: [PATCH 331/423] perf: arm-spe: Fix perf report --mem-mode Since commit bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") "perf mem report" and "perf report --mem-mode" don't allow opening the file unless one of the events has PERF_SAMPLE_DATA_SRC set. SPE doesn't have this set even though synthetic memory data is generated after it is decoded. Fix this issue by setting DATA_SRC on SPE events. This has no effect on the data collected because the SPE driver doesn't do anything with that flag and doesn't generate samples. Fixes: bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") Signed-off-by: James Clark Tested-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: German Gomez Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Ravi Bangoria Cc: Will Deacon Link: https://lore.kernel.org/r/20220408144056.1955535-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/arm-spe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 86e2e926aa0e..af4d63af8072 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) From aeee9dc53ce405d2161f9915f553114e94e5b677 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Apr 2022 16:26:25 +0300 Subject: [PATCH 332/423] perf tools: Fix perf's libperf_print callback eprintf() does not expect va_list as the type of the 4th parameter. Use veprintf() because it does. Signed-off-by: Adrian Hunter Fixes: 428dab813a56ce94 ("libperf: Merge libperf_set_print() into libperf_init()") Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220408132625.2451452-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b42..6aae7b6c376b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) From c9c2a427dd9fe0e73eceacafb42d54f3c4535693 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:10 +0530 Subject: [PATCH 333/423] perf bench: Fix futex bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench futex' testcase fails on systems with more than 1K CPUs. Testcase: perf bench futex all Failure snippet: <<>>Running futex/hash benchmark... perf: pthread_create: No such file or directory <<>> All the futex benchmarks (ie hash, lock-api, requeue, wake, wake-parallel), pthread_create is invoked in respective bench_futex_* function. Though the logs shows direct failure from pthread_create, strace logs showed that actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the futex benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Reviewed-by: Srikar Dronamraju Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 26 +++++++++++++++++++------- tools/perf/bench/futex-lock-pi.c | 21 ++++++++++++++++----- tools/perf/bench/futex-requeue.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake-parallel.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake.c | 22 ++++++++++++++++------ 5 files changed, 83 insertions(+), 28 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index dbcecec4eeda..f05db4cf983d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -122,12 +122,14 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpuset; + cpu_set_t *cpuset; struct sigaction act; unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; struct perf_cpu_map *cpu; + int nrcpus; + size_t size; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); + + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); - + } ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, (void *)(struct worker *) &worker[i]); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6fc9a3d55c1f..0abb3f7ee24f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -120,11 +120,17 @@ static void *workerfn(void *arg) static void create_threads(struct worker *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, } else worker[i].futex = &global_futex; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } int bench_futex_lock_pi(int argc, const char **argv) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2f59d5d1c509..b6faabfafb8e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 861deb934745..e47f46a3a47e 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void print_run(struct thread_data *waking_worker, unsigned int run_num) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index cfda48bef1d7..201a3555f09a 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -97,22 +97,32 @@ static void print_summary(void) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; - + size_t size; + int nrcpus = perf_cpu_map__nr(cpu); threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, From 299687e18a06aa648c8d4ebb025b322ac83fe7dd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:11 +0530 Subject: [PATCH 334/423] perf bench: Fix epoll bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench epoll' testcase fails on systems with more than 1K CPUs. Testcase: perf bench epoll all Result snippet: <<>> Run summary [PID 106497]: 1399 threads monitoring on 64 file-descriptors for 8 secs. perf: pthread_create: No such file or directory <<>> In epoll benchmarks (ctl, wait) pthread_create is invoked in do_threads from respective bench_epoll_* function. Though the logs shows direct failure from pthread_create, the actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the epoll benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/epoll-ctl.c | 25 +++++++++++++++++++------ tools/perf/bench/epoll-wait.c | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 134612bde0cb..4256dc5d6236 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0; + int nrcpus; + size_t size; if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) init_fdmaps(w, 50); if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 37de970c9743..2728b0140853 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -291,9 +291,11 @@ static void print_summary(void) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0, events = EPOLLIN; + int nrcpus; + size_t size; if (oneshot) events |= EPOLLONESHOT; @@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) } if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); From 3ae87d2f25c0e998da2721ce332e2b80d3d53c39 Mon Sep 17 00:00:00 2001 From: Piotr Chmura Date: Thu, 31 Mar 2022 17:55:50 +0200 Subject: [PATCH 335/423] media: si2157: unknown chip version Si2147-A30 ROM 0x50 Fix firmware file names assignment in si2157 tuner, allow for running devices without firmware files needed. modprobe gives error: unknown chip version Si2147-A30 ROM 0x50 Device initialization is interrupted. Caused by: 1. table si2157_tuners has swapped fields rom_id and required vs struct si2157_tuner_info. 2. both firmware file names can be null for devices with required == false - device uses build-in firmware in this case Tested on this device: m07ca:1871 AVerMedia Technologies, Inc. TD310 DVB-T/T2/C dongle [mchehab: fix mangled patch] Link: https://bugzilla.kernel.org/show_bug.cgi?id=215726 Link: https://lore.kernel.org/lkml/5f660108-8812-383c-83e4-29ee0558d623@leemhuis.info/ Link: https://lore.kernel.org/linux-media/c4bcaff8-fbad-969e-ad47-e2c487ac02a1@gmail.com Fixes: 1c35ba3bf972 ("media: si2157: use a different namespace for firmware") Cc: stable@vger.kernel.org # 5.17.x Signed-off-by: Piotr Chmura Tested-by: Robert Schlabbach Signed-off-by: Mauro Carvalho Chehab --- drivers/media/tuners/si2157.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index 47029746b89e..0de587b412d4 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -77,16 +77,16 @@ err_mutex_unlock: } static const struct si2157_tuner_info si2157_tuners[] = { - { SI2141, false, 0x60, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2141, false, 0x61, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2146, false, 0x11, SI2146_11_FIRMWARE, NULL }, - { SI2147, false, 0x50, SI2147_50_FIRMWARE, NULL }, - { SI2148, true, 0x32, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2148, true, 0x33, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2157, false, 0x50, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, - { SI2158, false, 0x50, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2158, false, 0x51, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2177, false, 0x50, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2141, 0x60, false, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2141, 0x61, false, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2146, 0x11, false, SI2146_11_FIRMWARE, NULL }, + { SI2147, 0x50, false, SI2147_50_FIRMWARE, NULL }, + { SI2148, 0x32, true, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2148, 0x33, true, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2157, 0x50, false, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2158, 0x50, false, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2158, 0x51, false, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2177, 0x50, false, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, }; static int si2157_load_firmware(struct dvb_frontend *fe, @@ -178,7 +178,7 @@ static int si2157_find_and_load_firmware(struct dvb_frontend *fe) } } - if (!fw_name && !fw_alt_name) { + if (required && !fw_name && !fw_alt_name) { dev_err(&client->dev, "unknown chip version Si21%d-%c%c%c ROM 0x%02x\n", part_id, cmd.args[1], cmd.args[3], cmd.args[4], rom_id); From bc21e74d4775f883ae1f542c1f1dc7205b15d925 Mon Sep 17 00:00:00 2001 From: Denis Nikitin Date: Tue, 29 Mar 2022 20:11:30 -0700 Subject: [PATCH 336/423] perf session: Remap buf if there is no space for event If a perf event doesn't fit into remaining buffer space return NULL to remap buf and fetch the event again. Keep the logic to error out on inadequate input from fuzzing. This fixes perf failing on ChromeOS (with 32b userspace): $ perf report -v -i perf.data ... prefetch_event: head=0x1fffff8 event->header_size=0x30, mmap_size=0x2000000: fuzzed or compressed perf.data? Error: failed to process sample Fixes: 57fc032ad643ffd0 ("perf session: Avoid infinite loop when seeing invalid header.size") Reviewed-by: James Clark Signed-off-by: Denis Nikitin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220330031130.2152327-1-denik@chromium.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b8dfe603e50..45a30040ec8d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2095,6 +2095,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2107,15 +2108,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } From 0ff26efe92844aa3910eff8951739d44a5ab6493 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:40 +0200 Subject: [PATCH 337/423] perf docs: Add perf-iostat link to manpages Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 9c330cdfa973..71ebdf8125de 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], linkperf:perf-evlist[1], linkperf:perf-ftrace[1], linkperf:perf-help[1], linkperf:perf-inject[1], -linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1], linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], linkperf:perf-script[1], linkperf:perf-test[1], From 3e6b43beb7b56ac6fd376c84f06d90ded73a2788 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:41 +0200 Subject: [PATCH 338/423] perf tools: Add external commands to list-cmds The `perf --list-cmds` output prints only internal commands, although there is no reason for that from users' perspective. Adding the external commands to commands array with NULL function pointer allows printing all perf commands while not changing the logic of command handler selection. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-2-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6aae7b6c376b..0170cb0819d6 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ struct cmd_struct { }; static struct cmd_struct commands[] = { + { "archive", NULL, 0 }, { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "iostat", NULL, 0 }, { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, @@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands+i; + if (p->fn == NULL) + continue; if (strcmp(p->cmd, cmd)) continue; exit(run_builtin(p, argc, argv)); From 940a445a904088eac715dd985c01847311a42459 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 Apr 2022 16:04:59 -0700 Subject: [PATCH 339/423] perf annotate: Drop objdump stderr to avoid getting stuck waiting for stdout output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If objdump writes to stderr it can block waiting for it to be read. As perf doesn't read stderr then progress stops with perf waiting for stdout output. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Dave Marchevsky Cc: Denis Nikitin Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Lexi Shao Cc: Li Huafei Cc: Mark Rutland Cc: Martin Liška Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Remi Bernon Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Thomas Richter Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20220407230503.1265036-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e4c641b240df..82cc396ef516 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) objdump_process.argv = objdump_argv; objdump_process.out = -1; objdump_process.err = -1; + objdump_process.no_stderr = 1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; From dbc2b1764734857d68425468ffa8486e97ab89df Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:14 +0200 Subject: [PATCH 340/423] mt76: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/net/wireless/mediatek/mt76/mt76x2/pci.c: In function ‘mt76x2e_probe’: ././include/linux/compiler_types.h:352:38: error: call to ‘__compiletime_assert_946’ \ declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Felix Fietkau Cc: Lorenzo Bianconi Cc: Ryder Lee Cc: Shayne Chen Cc: Sean Wang Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220405151517.29753-9-bp@alien8.de --- drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 8a22ee581674..df85ebc6e1df 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id) mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9); /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */ - mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf); + mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf); /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); From 6fb3a5868b2117611f41e421e10e6a8c2a13039a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 18:55:37 +0200 Subject: [PATCH 341/423] brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c: In function ‘brcmf_sdio_drivestrengthinit’: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:3798:2: error: case label does not reduce to an integer constant case SDIOD_DRVSTR_KEY(BRCM_CC_43143_CHIP_ID, 17): ^~~~ drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:3809:2: error: case label does not reduce to an integer constant case SDIOD_DRVSTR_KEY(BRCM_CC_43362_CHIP_ID, 13): ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: brcm80211-dev-list.pdl@broadcom.com Cc: netdev@vger.kernel.org Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/Ykx0iRlvtBnKqtbG@zn.tnic --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index ba3c159111d3..d78ccc223709 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype { BRCMF_SDIO_FT_SUB, }; -#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) +#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { From 5a6b06f5927c940fa44026695779c30b7536474c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 4 Apr 2022 22:48:00 +0200 Subject: [PATCH 342/423] ath9k: Fix usage of driver-private space in tx_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ieee80211_tx_info_clear_status() helper also clears the rate counts and the driver-private part of struct ieee80211_tx_info, so using it breaks quite a few other things. So back out of using it, and instead define a ath-internal helper that only clears the area between the status_driver_data and the rates info. Combined with moving the ath_frame_info struct to status_driver_data, this avoids clearing anything we shouldn't be, and so we can keep the existing code for handling the rate information. While fixing this I also noticed that the setting of tx_info->status.rates[tx_rateindex].count on hardware underrun errors was always immediately overridden by the normal setting of the same fields, so rearrange the code so that the underrun detection actually takes effect. The new helper could be generalised to a 'memset_between()' helper, but leave it as a driver-internal helper for now since this needs to go to stable. Cc: stable@vger.kernel.org Reported-by: Peter Seiderer Fixes: 037250f0a45c ("ath9k: Properly clear TX status area before reporting to mac80211") Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Peter Seiderer Tested-by: Peter Seiderer Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220404204800.2681133-1-toke@toke.dk --- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/ath9k/xmit.c | 30 ++++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 98090e40e1cf..e2791d45f5f5 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index cbcf96ac303e..db83cc4ba810 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2542,6 +2542,16 @@ skip_tx_complete: spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2553,7 +2563,7 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; - ieee80211_tx_info_clear_status(tx_info); + ath_clear_tx_status(tx_info); if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2569,6 +2579,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2590,13 +2607,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) From ce522ba9ef7e2d9fb22a39eb3371c0c64e2a433e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Apr 2022 14:21:36 -1000 Subject: [PATCH 343/423] Linux 5.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8c7de9a72ea2..29e273d3f8cc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Superb Owl # *DOCUMENTATION* From 2f7a26abb8241a0208c68d22815aa247c5ddacab Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sat, 9 Apr 2022 03:26:55 +0200 Subject: [PATCH 344/423] ALSA: pcm: Test for "silence" field in struct "pcm_format_data" Syzbot reports "KASAN: null-ptr-deref Write in snd_pcm_format_set_silence".[1] It is due to missing validation of the "silence" field of struct "pcm_format_data" in "pcm_formats" array. Add a test for valid "pat" and, if it is not so, return -EINVAL. [1] https://lore.kernel.org/lkml/000000000000d188ef05dc2c7279@google.com/ Reported-and-tested-by: syzbot+205eb15961852c2c5974@syzkaller.appspotmail.com Signed-off-by: Fabio M. De Francesco Cc: Link: https://lore.kernel.org/r/20220409012655.9399-1-fmdefrancesco@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index 4866aed97aac..5588b6a1ee8b 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -433,7 +433,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int return 0; width = pcm_formats[(INT)format].phys; /* physical width */ pat = pcm_formats[(INT)format].silence; - if (! width) + if (!width || !pat) return -EINVAL; /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { From 264fb03497ec1c7841bba872571bcd11beed57a7 Mon Sep 17 00:00:00 2001 From: Tao Jin Date: Sat, 9 Apr 2022 18:44:24 -0400 Subject: [PATCH 345/423] ALSA: hda/realtek: add quirk for Lenovo Thinkpad X12 speakers For this specific device on Lenovo Thinkpad X12 tablet, the verbs were dumped by qemu running a guest OS that init this codec properly. After studying the dump, it turns out that the same quirk used by the other Lenovo devices can be reused. The patch was tested working against the mainline kernel. Cc: Signed-off-by: Tao Jin Link: https://lore.kernel.org/r/CO6PR03MB6241CD73310B37858FE64C85E1E89@CO6PR03MB6241.namprd03.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 61df440fdb61..44aed1a54845 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9270,6 +9270,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), From b2cd2cde7d690b760bcdd675380ff37c3e1aa38d Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Thu, 7 Apr 2022 10:16:10 +0530 Subject: [PATCH 346/423] net: phy: LAN87xx: remove genphy_softreset in config_aneg When the T1 phy master/slave state is changed, at the end of config_aneg function genphy_softreset is called. After the reset all the registers configured during the config_init are restored to default value. To avoid this, removed the genphy_softreset call. v1->v2 ------ Added the author in cc Fixes: 8a1b415d70b7 ("net: phy: added ethtool master-slave configuration support") Signed-off-by: Arun Ramadoss Signed-off-by: David S. Miller --- drivers/net/phy/microchip_t1.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index 389df3f4293c..3f79bbbe62d3 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -706,7 +706,6 @@ static int lan87xx_read_status(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { u16 ctl = 0; - int rc; switch (phydev->master_slave_set) { case MASTER_SLAVE_CFG_MASTER_FORCE: @@ -722,11 +721,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) return -EOPNOTSUPP; } - rc = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); - if (rc == 1) - rc = genphy_soft_reset(phydev); - - return rc; + return phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); } static struct phy_driver microchip_t1_phy_driver[] = { From a6aaa00324240967272b451bfa772547bd576ee6 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Thu, 7 Apr 2022 08:25:21 -0500 Subject: [PATCH 347/423] net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link When using a fixed-link, the altr_tse_pcs driver crashes due to null-pointer dereference as no phy_device is provided to tse_pcs_fix_mac_speed function. Fix this by adding a check for phy_dev before calling the tse_pcs_fix_mac_speed() function. Also clean up the tse_pcs_fix_mac_speed function a bit. There is no need to check for splitter_base and sgmii_adapter_base because the driver will fail if these 2 variables are not derived from the device tree. Fixes: fb3bbdb85989 ("net: ethernet: Add TSE PCS support to dwmac-socfpga") Signed-off-by: Dinh Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 8 -------- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h | 4 ++++ drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 13 +++++-------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871..00f6d347eaf7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bd..694ac25ef426 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include #include +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index b7c2579c963b..ac9e6c7a33b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,16 +59,14 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + writew(SGMII_ADAPTER_DISABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -93,7 +88,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + if (phy_dev) tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); } From e8a64bbaaad1f6548cec5508297bc6d45e8ab69e Mon Sep 17 00:00:00 2001 From: Benedikt Spranger Date: Fri, 8 Apr 2022 11:47:45 +0200 Subject: [PATCH 348/423] net/sched: taprio: Check if socket flags are valid A user may set the SO_TXTIME socket option to ensure a packet is send at a given time. The taprio scheduler has to confirm, that it is allowed to send a packet at that given time, by a check against the packet time schedule. The scheduler drop the packet, if the gates are closed at the given send time. The check, if SO_TXTIME is set, may fail since sk_flags are part of an union and the union is used otherwise. This happen, if a socket is not a full socket, like a request socket for example. Add a check to verify, if the union is used for sk_flags. Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Signed-off-by: Benedikt Spranger Reviewed-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 377f896bdedc..b9c71a304d39 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -417,7 +417,8 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, { struct taprio_sched *q = qdisc_priv(sch); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { From 6624bb34b4eb19f715db9908cca00122748765d7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Apr 2022 11:42:03 +0200 Subject: [PATCH 349/423] nl80211: correctly check NL80211_ATTR_REG_ALPHA2 size We need this to be at least two bytes, so we can access alpha2[0] and alpha2[1]. It may be three in case some userspace used NUL-termination since it was NLA_STRING (and we also push it out with NUL-termination). Cc: stable@vger.kernel.org Reported-by: Lee Jones Link: https://lore.kernel.org/r/20220411114201.fd4a31f06541.Ie7ff4be2cf348d8cc28ed0d626fc54becf7ea799@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ee1c2b6b6971..21e808fcb676 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -528,7 +528,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, From a5199b5626cd6913cf8776a835bc63d40e0686ad Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 11 Apr 2022 14:37:51 +0530 Subject: [PATCH 350/423] cfg80211: hold bss_lock while updating nontrans_list Synchronize additions to nontrans_list of transmitting BSS with bss_lock to avoid races. Also when cfg80211_add_nontrans_list() fails __cfg80211_unlink_bss() needs bss_lock to be held (has lockdep assert on bss_lock). So protect the whole block with bss_lock to avoid races and warnings. Found during code review. Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Rameshkumar Sundaram Link: https://lore.kernel.org/r/1649668071-9370-1-git-send-email-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b2fdac96bab0..4a6d86432910 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2018,11 +2018,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); From fb4bccd863ccccd36ad000601856609e259a1859 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 6 Apr 2022 10:56:59 -0700 Subject: [PATCH 351/423] mac80211: fix ht_capa printout in debugfs Don't use sizeof(pointer) when calculating scnprintf offset. Fixes: 01f84f0ed3b4 ("mac80211: reduce stack usage in debugfs") Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20220406175659.20611-1-greearb@candelatech.com [correct the Fixes tag] Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 9479f2787ea7..88d9cc945a21 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -441,7 +441,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, #define PRINT_HT_CAP(_cond, _str) \ do { \ if (_cond) \ - p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \ + p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \ } while (0) char *buf, *p; int i; From 05ae2fba821c4d122ab4ba3e52144e21586c4010 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 9 Apr 2022 13:20:19 +0200 Subject: [PATCH 352/423] netfilter: nft_socket: make cgroup match work in input too cgroupv2 helper function ignores the already-looked up sk and uses skb->sk instead. Just pass sk from the calling function instead; this will make cgroup matching work for udp and tcp in input even when edemux did not set skb->sk already. Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2") Signed-off-by: Florian Westphal Tested-by: Topi Miettinen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index bd3792f080ed..6d9e8e0a3a7d 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -37,12 +37,11 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt, #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool -nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level) +nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { - struct sock *sk = skb_to_full_sk(pkt->skb); struct cgroup *cgrp; - if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) + if (!sk_fullsock(sk)) return false; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); @@ -109,7 +108,7 @@ static void nft_socket_eval(const struct nft_expr *expr, break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: - if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) { + if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; return; } From 1a7eb80d170c28be2928433702256fe2a0bd1e0f Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 8 Apr 2022 09:49:41 +0000 Subject: [PATCH 353/423] dpaa_eth: Fix missing of_node_put in dpaa_get_ts_info() Both of of_get_parent() and of_parse_phandle() return node pointer with refcount incremented, use of_node_put() on it to decrease refcount when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 763d2c7b5fb1..5750f9a56393 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev, info->phc_index = -1; fman_node = of_get_parent(mac_node); - if (fman_node) + if (fman_node) { ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0); + of_node_put(fman_node); + } - if (ptp_node) + if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); + } if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); From e3fa461d8b0e185b7da8a101fe94dfe6dd500ac0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 8 Apr 2022 16:03:42 +0200 Subject: [PATCH 354/423] ipv6: fix panic when forwarding a pkt with no in6 dev kongweibin reported a kernel panic in ip6_forward() when input interface has no in6 dev associated. The following tc commands were used to reproduce this panic: tc qdisc del dev vxlan100 root tc qdisc add dev vxlan100 root netem corrupt 5% CC: stable@vger.kernel.org Fixes: ccd27f05ae7b ("ipv6: fix 'disable_policy' for fwd packets") Reported-by: kongweibin Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e23f058166af..fa63ef2bd99c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; From 0c8b6641c8410930e2a2f4a437ac3f987fbf9404 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 6 Apr 2022 14:37:13 +0800 Subject: [PATCH 355/423] selftests: kvm: add tsc_scaling_sync to .gitignore The tsc_scaling_sync's binary should be present in the .gitignore file for the git to ignore it. Signed-off-by: Like Xu Message-Id: <20220406063715.55625-3-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 573d93a1d61f..0b0e4402bba6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -34,6 +34,7 @@ /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test +/x86_64/tsc_scaling_sync /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/userspace_io_test From af105c9cc9ec8fdc087827a98d4b9dc10d61c358 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 6 Apr 2022 14:37:15 +0800 Subject: [PATCH 356/423] Documentation: KVM: Add SPDX-License-Identifier tag +new file mode 100644 +WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 +#27: FILE: Documentation/virt/kvm/x86/errata.rst:1: Opportunistically update all other non-added KVM documents and remove a new extra blank line at EOF for x86/errata.rst. Signed-off-by: Like Xu Message-Id: <20220406063715.55625-5-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/vcpu-requests.rst | 2 ++ Documentation/virt/kvm/x86/amd-memory-encryption.rst | 2 ++ Documentation/virt/kvm/x86/errata.rst | 2 +- Documentation/virt/kvm/x86/running-nested-guests.rst | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst index db43ee571f5a..31f62b64e07b 100644 --- a/Documentation/virt/kvm/vcpu-requests.rst +++ b/Documentation/virt/kvm/vcpu-requests.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================= KVM VCPU Requests ================= diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 1c6847fff304..2d307811978c 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ====================================== Secure Encrypted Virtualization (SEV) ====================================== diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst index 806f049b6975..410e0aa63493 100644 --- a/Documentation/virt/kvm/x86/errata.rst +++ b/Documentation/virt/kvm/x86/errata.rst @@ -1,3 +1,4 @@ +.. SPDX-License-Identifier: GPL-2.0 ======================================= Known limitations of CPU virtualization @@ -36,4 +37,3 @@ Nested virtualization features ------------------------------ TBD - diff --git a/Documentation/virt/kvm/x86/running-nested-guests.rst b/Documentation/virt/kvm/x86/running-nested-guests.rst index bd70c69468ae..a27e6768d900 100644 --- a/Documentation/virt/kvm/x86/running-nested-guests.rst +++ b/Documentation/virt/kvm/x86/running-nested-guests.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================== Running nested guests with KVM ============================== From c538dc792ff7e456d777f585fdf96aa4e781ed66 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 8 Apr 2022 08:37:10 -0500 Subject: [PATCH 357/423] KVM: SVM: Do not activate AVIC for SEV-enabled guest Since current AVIC implementation cannot support encrypted memory, inhibit AVIC for SEV-enabled guest. Signed-off-by: Suravee Suthikulpanit Message-Id: <20220408133710.54275-1-suravee.suthikulpanit@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/avic.c | 3 ++- arch/x86/kvm/svm/sev.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0d37ba442de3..92843fcdc1cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1052,6 +1052,7 @@ enum kvm_apicv_inhibit { APICV_INHIBIT_REASON_X2APIC, APICV_INHIBIT_REASON_BLOCKIRQ, APICV_INHIBIT_REASON_ABSENT, + APICV_INHIBIT_REASON_SEV, }; struct kvm_arch { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index a1cf9c31273b..421619540ff9 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -837,7 +837,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_IRQWIN) | BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_SEV); return supported & BIT(reason); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c2fe89ecdb2d..537aaddc852f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -260,6 +260,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); + return 0; e_free: From 42dcbe7d8bac997eef4c379e61d9121a15ed4e36 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 7 Apr 2022 22:10:13 +0200 Subject: [PATCH 358/423] KVM: x86: hyper-v: Avoid writing to TSC page without an active vCPU The following WARN is triggered from kvm_vm_ioctl_set_clock(): WARNING: CPU: 10 PID: 579353 at arch/x86/kvm/../../../virt/kvm/kvm_main.c:3161 mark_page_dirty_in_slot+0x6c/0x80 [kvm] ... CPU: 10 PID: 579353 Comm: qemu-system-x86 Tainted: G W O 5.16.0.stable #20 Hardware name: LENOVO 20UF001CUS/20UF001CUS, BIOS R1CET65W(1.34 ) 06/17/2021 RIP: 0010:mark_page_dirty_in_slot+0x6c/0x80 [kvm] ... Call Trace: ? kvm_write_guest+0x114/0x120 [kvm] kvm_hv_invalidate_tsc_page+0x9e/0xf0 [kvm] kvm_arch_vm_ioctl+0xa26/0xc50 [kvm] ? schedule+0x4e/0xc0 ? __cond_resched+0x1a/0x50 ? futex_wait+0x166/0x250 ? __send_signal+0x1f1/0x3d0 kvm_vm_ioctl+0x747/0xda0 [kvm] ... The WARN was introduced by commit 03c0304a86bc ("KVM: Warn if mark_page_dirty() is called without an active vCPU") but the change seems to be correct (unlike Hyper-V TSC page update mechanism). In fact, there's no real need to actually write to guest memory to invalidate TSC page, this can be done by the first vCPU which goes through kvm_guest_time_update(). Reported-by: Maxim Levitsky Reported-by: Naresh Kamboju Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20220407201013.963226-1-vkuznets@redhat.com> --- arch/x86/include/asm/kvm_host.h | 4 +-- arch/x86/kvm/hyperv.c | 44 ++++++++------------------------- arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c | 7 +++--- 4 files changed, 15 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 92843fcdc1cf..e0c0f0e1f754 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -974,12 +974,10 @@ enum hv_tsc_page_status { HV_TSC_PAGE_UNSET = 0, /* TSC page MSR was written by the guest, update pending */ HV_TSC_PAGE_GUEST_CHANGED, - /* TSC page MSR was written by KVM userspace, update pending */ + /* TSC page update was triggered from the host side */ HV_TSC_PAGE_HOST_CHANGED, /* TSC page was properly set up and is currently active */ HV_TSC_PAGE_SET, - /* TSC page is currently being updated and therefore is inactive */ - HV_TSC_PAGE_UPDATING, /* TSC page was set up with an inaccessible GPA */ HV_TSC_PAGE_BROKEN, }; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 123b677111c5..46f9dfb60469 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1135,11 +1135,13 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) - return; - mutex_lock(&hv->hv_lock); + + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_SET || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) + goto out_unlock; + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) goto out_unlock; @@ -1201,45 +1203,19 @@ out_unlock: mutex_unlock(&hv->hv_lock); } -void kvm_hv_invalidate_tsc_page(struct kvm *kvm) +void kvm_hv_request_tsc_page_update(struct kvm *kvm) { struct kvm_hv *hv = to_kvm_hv(kvm); - u64 gfn; - int idx; - - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || - tsc_page_update_unsafe(hv)) - return; mutex_lock(&hv->hv_lock); - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - goto out_unlock; + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET && + !tsc_page_update_unsafe(hv)) + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; - /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ - if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) - hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; - - gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - - hv->tsc_ref.tsc_sequence = 0; - - /* - * Take the srcu lock as memslots will be accessed to check the gfn - * cache generation against the memslots generation. - */ - idx = srcu_read_lock(&kvm->srcu); - if (kvm_write_guest(kvm, gfn_to_gpa(gfn), - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) - hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; - srcu_read_unlock(&kvm->srcu, idx); - -out_unlock: mutex_unlock(&hv->hv_lock); } - static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) { if (!hv_vcpu->enforce_cpuid) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e19c00ee9ab3..da2737f2a956 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -137,7 +137,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); -void kvm_hv_invalidate_tsc_page(struct kvm *kvm); +void kvm_hv_request_tsc_page_update(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index de49a88df1c2..547ba00ef64f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2901,7 +2901,7 @@ static void kvm_end_pvclock_update(struct kvm *kvm) static void kvm_update_masterclock(struct kvm *kvm) { - kvm_hv_invalidate_tsc_page(kvm); + kvm_hv_request_tsc_page_update(kvm); kvm_start_pvclock_update(kvm); pvclock_update_vm_gtod_copy(kvm); kvm_end_pvclock_update(kvm); @@ -3113,8 +3113,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) offsetof(struct compat_vcpu_info, time)); if (vcpu->xen.vcpu_time_info_set) kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0); - if (!v->vcpu_idx) - kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); + kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); return 0; } @@ -6241,7 +6240,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) if (data.flags & ~KVM_CLOCK_VALID_FLAGS) return -EINVAL; - kvm_hv_invalidate_tsc_page(kvm); + kvm_hv_request_tsc_page_update(kvm); kvm_start_pvclock_update(kvm); pvclock_update_vm_gtod_copy(kvm); From 5ad7f18cd82cee8e773d40cc7a1465a526f2615c Mon Sep 17 00:00:00 2001 From: Tomas Melin Date: Thu, 7 Apr 2022 19:16:59 +0300 Subject: [PATCH 359/423] net: macb: Restart tx only if queue pointer is lagging commit 4298388574da ("net: macb: restart tx after tx used bit read") added support for restarting transmission. Restarting tx does not work in case controller asserts TXUBR interrupt and TQBP is already at the end of the tx queue. In that situation, restarting tx will immediately cause assertion of another TXUBR interrupt. The driver will end up in an infinite interrupt loop which it cannot break out of. For cases where TQBP is at the end of the tx queue, instead only clear TX_USED interrupt. As more data gets pushed to the queue, transmission will resume. This issue was observed on a Xilinx Zynq-7000 based board. During stress test of the network interface, driver would get stuck on interrupt loop within seconds or minutes causing CPU to stall. Signed-off-by: Tomas Melin Tested-by: Claudiu Beznea Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220407161659.14532-1-tomas.melin@vaisala.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 800d5ced5800..e475be29845c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1658,6 +1658,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; + unsigned int head_idx, tbqp; if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); @@ -1665,6 +1666,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp); + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp)); + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head)); + + if (tbqp == head_idx) + return; + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } From b1871fd48efc567650dbdc974e5a2342a03fe0d2 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:33 +0200 Subject: [PATCH 360/423] net/smc: use memcpy instead of snprintf to avoid out of bounds read Using snprintf() to convert not null-terminated strings to null terminated strings may cause out of bounds read in the source string. Therefore use memcpy() and terminate the target string with a null afterwards. Fixes: fa0866625543 ("net/smc: add support for user defined EIDs") Fixes: 3c572145c24e ("net/smc: add generic netlink support for system EID") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ce27399b38b1..f9f3f59c79de 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -191,7 +191,8 @@ static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, flags, SMC_NETLINK_DUMP_UEID); if (!hdr) return -ENOMEM; - snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); + memcpy(ueid_str, ueid, SMC_MAX_EID_LEN); + ueid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { genlmsg_cancel(skb, hdr); return -EMSGSIZE; @@ -252,7 +253,8 @@ int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) goto end; smc_ism_get_system_eid(&seid); - snprintf(seid_str, sizeof(seid_str), "%s", seid); + memcpy(seid_str, seid, SMC_MAX_EID_LEN); + seid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) goto err; read_lock(&smc_clc_eid_table.lock); From d22f4f977236f97e01255a80bca2ea93a8094fc8 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:34 +0200 Subject: [PATCH 361/423] net/smc: Fix NULL pointer dereference in smc_pnet_find_ib() dev_name() was called with dev.parent as argument but without to NULL-check it before. Solve this by checking the pointer before the call to dev_name(). Fixes: af5f60c7e3d5 ("net/smc: allow PCI IDs as ib device names in the pnet table") Reported-by: syzbot+03e3e228510223dabd34@syzkaller.appspotmail.com Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_pnet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7984f8883472..7055ed10e316 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -311,8 +311,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } From 49b7d376abe54a49e8bd5e64824032b7c97c62d4 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:35 +0200 Subject: [PATCH 362/423] net/smc: Fix af_ops of child socket pointing to released memory Child sockets may inherit the af_ops from the parent listen socket. When the listen socket is released then the af_ops of the child socket points to released memory. Solve that by restoring the original af_ops for child sockets which inherited the parent af_ops. And clear any inherited user_data of the parent socket. Fixes: 8270d9c21041 ("net/smc: Limit backlog connections") Reviewed-by: Wenjia Zhang Signed-off-by: Karsten Graul Reviewed-by: D. Wythe Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f0d118e9f155..14ddc40149e8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -121,6 +121,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, bool *own_req) { struct smc_sock *smc; + struct sock *child; smc = smc_clcsock_user_data(sk); @@ -134,8 +135,17 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, } /* passthrough to original syn recv sock fct */ - return smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, - own_req); + child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, + own_req); + /* child must not inherit smc or its ops */ + if (child) { + rcu_assign_sk_user_data(child, NULL); + + /* v4-mapped sockets don't inherit parent ops. Don't restore. */ + if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops) + inet_csk(child)->icsk_af_ops = smc->ori_af_ops; + } + return child; drop: dst_release(dst); From 8467dda0c26583547731e7f3ea73fc3856bae3bf Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Sat, 9 Apr 2022 08:36:11 +0200 Subject: [PATCH 363/423] sctp: Initialize daddr on peeled off socket Function sctp_do_peeloff() wrongly initializes daddr of the original socket instead of the peeled off socket, which makes getpeername() return zeroes instead of the primary address. Initialize the new socket instead. Fixes: d570ee490fb1 ("[SCTP]: Correctly set daddr for IPv6 sockets during peeloff") Signed-off-by: Petr Malat Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20220409063611.673193-1-oss@malat.biz Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e1a9600be5e..7b0427658056 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5636,7 +5636,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the From eb9c0d671e9432901b8a453e7915416f22f7f919 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:40 +0200 Subject: [PATCH 364/423] net: lan966x: Update lan966x_ptp_get_nominal_value The clk_per_cfg register represents the value added to the system clock for each clock cycle. The issue is that the default value is wrong, meaning that in case the DUT was a grandmaster then everone in the network was too slow. In case there was a grandmaster, then there is no issue because the DUT will configure clk_per_cfg register based on the master frequency. Fixes: d096459494a887 ("net: lan966x: Add support for ptp clocks") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index ae782778d6dd..0a1041da4384 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -29,10 +29,10 @@ enum { static u64 lan966x_ptp_get_nominal_value(void) { - u64 res = 0x304d2df1; - - res <<= 32; - return res; + /* This is the default value that for each system clock, the time of day + * is increased. It has the format 5.59 nanosecond. + */ + return 0x304d4873ecade305; } int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) From 6476f90aefaf119c47ceccde52327464e813fe26 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:41 +0200 Subject: [PATCH 365/423] net: lan966x: Fix IGMP snooping when frames have vlan tag In case an IGMP frame has a vlan tag, then the function lan966x_hw_offload couldn't figure out that is a IGMP frame. Therefore the SW thinks that the frame was already forward by the HW which is not true. Extend lan966x_hw_offload to pop the vlan tag if are any and then check for IGMP frames. Fixes: 47aeea0d57e80c ("net: lan966x: Implement the callback SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED ") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1f8c67f0261b..958e55596b82 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -446,6 +446,12 @@ static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, ANA_CPU_FWD_CFG_MLD_REDIR_ENA))) return true; + if (eth_type_vlan(skb->protocol)) { + skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + if (skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_IGMP) return false; From d7a947d289dc205fc717c004dcebe33b15305afd Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:42 +0200 Subject: [PATCH 366/423] net: lan966x: Fix when a port's upper is changed. On lan966x it is not allowed to have foreign interfaces under a bridge which already contains lan966x ports. So when a port leaves the bridge it would call switchdev_bridge_port_unoffload which eventually will notify the other ports that bridge left the vlan group but that is not true because the bridge is still part of the vlan group. Therefore when a port leaves the bridge, stop generating replays because already the HW cleared after itself and the other ports don't need to do anything else. Fixes: cf2f60897e921e ("net: lan966x: Add support to offload the forwarding.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index e3555c94294d..df2bee678559 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -322,8 +322,7 @@ static int lan966x_port_prechangeupper(struct net_device *dev, if (netif_is_bridge_master(info->upper_dev) && !info->linking) switchdev_bridge_port_unoffload(port->dev, port, - &lan966x_switchdev_nb, - &lan966x_switchdev_blocking_nb); + NULL, NULL); return NOTIFY_DONE; } From 269219321eb7d7645a3122cf40a420c5dc655eb9 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:43 +0200 Subject: [PATCH 367/423] net: lan966x: Stop processing the MAC entry is port is wrong. Currently when getting a new MAC is learn, the HW generates an interrupt. So then the SW will check the new entry and checks if it arrived on a correct port. If it didn't just generate a warning. But this could still crash the system. Therefore stop processing that entry when an issue is seen. Fixes: 5ccd66e01cbef8 ("net: lan966x: add support for interrupts from analyzer") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ce5970bdcc6a..2679111ef669 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -346,7 +346,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; /* If the entry in SW is found, then there is nothing * to do @@ -392,7 +393,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); if (!mac_entry) From 6c6f9f31ecd47dce1d0dafca4bec8805f9bc97cd Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Apr 2022 10:14:59 +0200 Subject: [PATCH 368/423] netfilter: nf_tables: nft_parse_register can return a negative value Since commit 6e1acfa387b9 ("netfilter: nf_tables: validate registers coming from userspace.") nft_parse_register can return a negative value, but the function prototype is still returning an unsigned int. Fixes: 6e1acfa387b9 ("netfilter: nf_tables: validate registers coming from userspace.") Signed-off-by: Antoine Tenart Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 128ee3b300d6..16c3a39689f4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9363,7 +9363,7 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; From fee2b871d8d6389c9b4bdf9346a99ccc1c98c9b8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 11:31:40 +0200 Subject: [PATCH 369/423] ALSA: core: Add snd_card_free_on_error() helper This is a small helper function to handle the error path more easily when an error happens during the probe for the device with the device-managed card. Since devres releases in the reverser order of the creations, usually snd_card_free() gets called at the last in the probe error path unless it already reached snd_card_register() calls. Due to this nature, when a driver expects the resource releases in card->private_free, this might be called too lately. As a workaround, one should call the probe like: static int __some_probe(...) { // do real probe.... } static int some_probe(...) { return snd_card_free_on_error(dev, __some_probe(dev, ...)); } so that the snd_card_free() is called explicitly at the beginning of the error path from the probe. This function will be used in the upcoming fixes to address the regressions by devres usages. Fixes: e8ad415b7a55 ("ALSA: core: Add managed card creation") Cc: Link: https://lore.kernel.org/r/20220412093141.8008-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/core.h | 1 + sound/core/init.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/include/sound/core.h b/include/sound/core.h index b7e9b58d3c78..6d4cc49584c6 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card); void snd_card_disconnect_sync(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); +int snd_card_free_on_error(struct device *dev, int ret); void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); diff --git a/sound/core/init.c b/sound/core/init.c index 31ba7024e3ad..726a8353201f 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -209,6 +209,12 @@ static void __snd_card_release(struct device *dev, void *data) * snd_card_register(), the very first devres action to call snd_card_free() * is added automatically. In that way, the resource disconnection is assured * at first, then released in the expected order. + * + * If an error happens at the probe before snd_card_register() is called and + * there have been other devres resources, you'd need to free the card manually + * via snd_card_free() call in the error; otherwise it may lead to UAF due to + * devres call orders. You can use snd_card_free_on_error() helper for + * handling it more easily. */ int snd_devm_card_new(struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size, @@ -235,6 +241,28 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid, } EXPORT_SYMBOL_GPL(snd_devm_card_new); +/** + * snd_card_free_on_error - a small helper for handling devm probe errors + * @dev: the managed device object + * @ret: the return code from the probe callback + * + * This function handles the explicit snd_card_free() call at the error from + * the probe callback. It's just a small helper for simplifying the error + * handling with the managed devices. + */ +int snd_card_free_on_error(struct device *dev, int ret) +{ + struct snd_card *card; + + if (!ret) + return 0; + card = devres_find(dev, __snd_card_release, NULL, NULL); + if (card) + snd_card_free(card); + return ret; +} +EXPORT_SYMBOL_GPL(snd_card_free_on_error); + static int snd_card_init(struct snd_card *card, struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size) From 313c7e57035125cb7533b53ddd0bc7aa562b433c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 11:31:41 +0200 Subject: [PATCH 370/423] ALSA: echoaudio: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 9c211bf392bb ("ALSA: echoaudio: Allocate resources with device-managed APIs") Reported-and-tested-by: Zheyu Ma Cc: Link: https://lore.kernel.org/r/CAMhUBjm2AdyEZ_-EgexdNDN7SvY4f89=4=FwAL+c0Mg0O+X50A@mail.gmail.com Link: https://lore.kernel.org/r/20220412093141.8008-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 25b012ef5c3e..c70c3ac4e99a 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1970,8 +1970,8 @@ static int snd_echo_create(struct snd_card *card, } /* constructor */ -static int snd_echo_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2139,6 +2139,11 @@ static int snd_echo_probe(struct pci_dev *pci, return 0; } +static int snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_echo_probe(pci, pci_id)); +} #if defined(CONFIG_PM_SLEEP) From 10b1881a97be240126891cb384bd3bc1869f52d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:25:58 +0200 Subject: [PATCH 371/423] ALSA: galaxy: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 35a245ec0619 ("ALSA: galaxy: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/galaxy/galaxy.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index ea001c80149d..3164eb8510fa 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -478,7 +478,7 @@ static void snd_galaxy_free(struct snd_card *card) galaxy_set_config(galaxy, galaxy->config); } -static int snd_galaxy_probe(struct device *dev, unsigned int n) +static int __snd_galaxy_probe(struct device *dev, unsigned int n) { struct snd_galaxy *galaxy; struct snd_wss *chip; @@ -598,6 +598,11 @@ static int snd_galaxy_probe(struct device *dev, unsigned int n) return 0; } +static int snd_galaxy_probe(struct device *dev, unsigned int n) +{ + return snd_card_free_on_error(dev, __snd_galaxy_probe(dev, n)); +} + static struct isa_driver snd_galaxy_driver = { .match = snd_galaxy_match, .probe = snd_galaxy_probe, From d72458071150b802940204950d0d462ea3c913b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:25:59 +0200 Subject: [PATCH 372/423] ALSA: sc6000: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 111601ff76e9 ("ALSA: sc6000: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sc6000.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 26ab7ff80768..60398fced046 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -537,7 +537,7 @@ static void snd_sc6000_free(struct snd_card *card) sc6000_setup_board(vport, 0); } -static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +static int __snd_sc6000_probe(struct device *devptr, unsigned int dev) { static const int possible_irqs[] = { 5, 7, 9, 10, 11, -1 }; static const int possible_dmas[] = { 1, 3, 0, -1 }; @@ -662,6 +662,11 @@ static int snd_sc6000_probe(struct device *devptr, unsigned int dev) return 0; } +static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +{ + return snd_card_free_on_error(devptr, __snd_sc6000_probe(devptr, dev)); +} + static struct isa_driver snd_sc6000_driver = { .match = snd_sc6000_match, .probe = snd_sc6000_probe, From a8e84a5da18e6d786540aa4ceb6f969d5f1a441d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:00 +0200 Subject: [PATCH 373/423] ALSA: ad1889: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 567f58754109 ("ALSA: ad1889: Allocate resources with device-managed APIs") Link: https://lore.kernel.org/r/20220412102636.16000-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ad1889.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c index bba4dae8dcc7..50e30704bf6f 100644 --- a/sound/pci/ad1889.c +++ b/sound/pci/ad1889.c @@ -844,8 +844,8 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci) } static int -snd_ad1889_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { int err; static int devno; @@ -904,6 +904,12 @@ snd_ad1889_probe(struct pci_dev *pci, return 0; } +static int snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ad1889_probe(pci, pci_id)); +} + static const struct pci_device_id snd_ad1889_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_ANALOG_DEVICES, PCI_DEVICE_ID_AD1889JS) }, { 0, }, From 19401a9441236cfbbbeb1bef4ef4c8668db45dfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:01 +0200 Subject: [PATCH 374/423] ALSA: ali5451: Fix the missing snd_card_free() call at probe error The recent cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 1f0819979248 ("ALSA: ali5451: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ali5451/ali5451.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index 92eb59db106d..2378a39abaeb 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -2124,8 +2124,8 @@ static int snd_ali_create(struct snd_card *card, return 0; } -static int snd_ali_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct snd_ali *codec; @@ -2170,6 +2170,12 @@ static int snd_ali_probe(struct pci_dev *pci, return 0; } +static int snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ali_probe(pci, pci_id)); +} + static struct pci_driver ali5451_driver = { .name = KBUILD_MODNAME, .id_table = snd_ali_ids, From d616a0246da88d811f9f4c3aa83003c05efd3af0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:02 +0200 Subject: [PATCH 375/423] ALSA: als4000: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 0e175f665960 ("ALSA: als4000: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/als4000.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c index 535eccd124be..f33aeb692a11 100644 --- a/sound/pci/als4000.c +++ b/sound/pci/als4000.c @@ -806,8 +806,8 @@ static void snd_card_als4000_free( struct snd_card *card ) snd_als4000_free_gameport(acard); } -static int snd_card_als4000_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -930,6 +930,12 @@ static int snd_card_als4000_probe(struct pci_dev *pci, return 0; } +static int snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_als4000_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_als4000_suspend(struct device *dev) { From 48e8adde8d1c586c799dab123fc1ebc8b8db620f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:03 +0200 Subject: [PATCH 376/423] ALSA: atiixp: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 86bde74dbf09 ("ALSA: atiixp: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-7-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/atiixp.c | 10 ++++++++-- sound/pci/atiixp_modem.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index b8e035d5930d..43d01f1847ed 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -1572,8 +1572,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp *chip; @@ -1623,6 +1623,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index 178dce8ef1e9..8864c4c3c7e1 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -1201,8 +1201,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp_modem *chip; @@ -1247,6 +1247,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, From b093de145bc8769c6e9207947afad9efe102f4f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:04 +0200 Subject: [PATCH 377/423] ALSA: au88x0: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: e44b5b440609 ("ALSA: au88x0: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-8-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/au88x0/au88x0.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index 342ef2a6655e..eb234153691b 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -193,7 +193,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci) // constructor -- see "Constructor" sub-section static int -snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -310,6 +310,12 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vortex_probe(pci, pci_id)); +} + // pci_driver definition static struct pci_driver vortex_driver = { .name = KBUILD_MODNAME, From 49fe36e1c02cb06f66689c888e4e767c31cd259d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:05 +0200 Subject: [PATCH 378/423] ALSA: azt3328: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 8c5823ef31e1 ("ALSA: azt3328: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-9-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/azt3328.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 089050470ff2..7f329dfc5404 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -2427,7 +2427,7 @@ snd_azf3328_create(struct snd_card *card, } static int -snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2520,6 +2520,12 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_azf3328_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static inline void snd_azf3328_suspend_regs(const struct snd_azf3328 *chip, From c79442cc5a38e46597bc647128c8f1de62d80020 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:06 +0200 Subject: [PATCH 379/423] ALSA: ca0106: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 1656fa6ea258 ("ALSA: ca0106: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-10-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ca0106/ca0106_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index 8577f9fa5ea6..cf1bac7a435f 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -1725,8 +1725,8 @@ static int snd_ca0106_midi(struct snd_ca0106 *chip, unsigned int channel) } -static int snd_ca0106_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1786,6 +1786,12 @@ static int snd_ca0106_probe(struct pci_dev *pci, return 0; } +static int snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ca0106_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_ca0106_suspend(struct device *dev) { From 9bf5ed9a4e623583f15202d99f4521bc39050f61 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:07 +0200 Subject: [PATCH 380/423] ALSA: cs4281: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 99041fea70d0 ("ALSA: cs4281: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-11-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cs4281.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index e7367402b84a..0c9cadf7b3b8 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -1827,8 +1827,8 @@ static void snd_cs4281_opl3_command(struct snd_opl3 *opl3, unsigned short cmd, spin_unlock_irqrestore(&opl3->reg_lock, flags); } -static int snd_cs4281_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1888,6 +1888,12 @@ static int snd_cs4281_probe(struct pci_dev *pci, return 0; } +static int snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs4281_probe(pci, pci_id)); +} + /* * Power Management */ From 2a56314798e0227cf51e3d1d184a419dc07bc173 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:08 +0200 Subject: [PATCH 381/423] ALSA: cs5535audio: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). Fixes: 5eba4c646dfe ("ALSA: cs5535audio: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-12-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cs5535audio/cs5535audio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 499fa0148f9a..440b8f9b40c9 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -281,8 +281,8 @@ static int snd_cs5535audio_create(struct snd_card *card, return 0; } -static int snd_cs5535audio_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -331,6 +331,12 @@ static int snd_cs5535audio_probe(struct pci_dev *pci, return 0; } +static int snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs5535audio_probe(pci, pci_id)); +} + static struct pci_driver cs5535audio_driver = { .name = KBUILD_MODNAME, .id_table = snd_cs5535audio_ids, From f37019b6bfe2e13cc536af0e6a42ed62005392ae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:09 +0200 Subject: [PATCH 382/423] ALSA: emu10k1x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 2b377c6b6012 ("ALSA: emu10k1x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-13-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index c49c44dc1082..89043392f3ec 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1491,8 +1491,8 @@ static int snd_emu10k1x_midi(struct emu10k1x *emu) return 0; } -static int snd_emu10k1x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1554,6 +1554,12 @@ static int snd_emu10k1x_probe(struct pci_dev *pci, return 0; } +static int snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_emu10k1x_probe(pci, pci_id)); +} + // PCI IDs static const struct pci_device_id snd_emu10k1x_ids[] = { { PCI_VDEVICE(CREATIVE, 0x0006), 0 }, /* Dell OEM version (EMU10K1) */ From c2dc46932d117a1505f589ad1db3095aa6789058 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:10 +0200 Subject: [PATCH 383/423] ALSA: ens137x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 10ed6eaf9d72 ("ALSA: ens137x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-14-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ens1370.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 2651f0c64c06..94efe347a97a 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -2304,8 +2304,8 @@ static irqreturn_t snd_audiopci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int snd_audiopci_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2369,6 +2369,12 @@ static int snd_audiopci_probe(struct pci_dev *pci, return 0; } +static int snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_audiopci_probe(pci, pci_id)); +} + static struct pci_driver ens137x_driver = { .name = KBUILD_MODNAME, .id_table = snd_audiopci_ids, From bc22628591e5913e67edb3c2a89b97849e30a8f8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:11 +0200 Subject: [PATCH 384/423] ALSA: es1938: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 08e9d3ab4cc1 ("ALSA: es1938: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-15-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/es1938.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c index 00b976f42a3d..e34ec6f89e7e 100644 --- a/sound/pci/es1938.c +++ b/sound/pci/es1938.c @@ -1716,8 +1716,8 @@ static int snd_es1938_mixer(struct es1938 *chip) } -static int snd_es1938_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1796,6 +1796,12 @@ static int snd_es1938_probe(struct pci_dev *pci, return 0; } +static int snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1938_probe(pci, pci_id)); +} + static struct pci_driver es1938_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1938_ids, From de9a01bc95a9e5e36d0659521bb04579053d8566 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:12 +0200 Subject: [PATCH 385/423] ALSA: es1968: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: a7b4cbfdc701 ("ALSA: es1968: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-16-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/es1968.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index 6a8a02a9ecf4..4a7e20bb11bc 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -2741,8 +2741,8 @@ static int snd_es1968_create(struct snd_card *card, /* */ -static int snd_es1968_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2848,6 +2848,12 @@ static int snd_es1968_probe(struct pci_dev *pci, return 0; } +static int snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1968_probe(pci, pci_id)); +} + static struct pci_driver es1968_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1968_ids, From 7f611274a3d1657a67b3fa8cd0cec1dee00e02b4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:13 +0200 Subject: [PATCH 386/423] ALSA: fm801: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 47c413395376 ("ALSA: fm801: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-17-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/fm801.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 9c22ff19e56d..62b3cb126c6d 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1268,8 +1268,8 @@ static int snd_fm801_create(struct snd_card *card, return 0; } -static int snd_card_fm801_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1333,6 +1333,12 @@ static int snd_card_fm801_probe(struct pci_dev *pci, return 0; } +static int snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_fm801_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static const unsigned char saved_regs[] = { FM801_PCM_VOL, FM801_I2S_VOL, FM801_FM_VOL, FM801_REC_SRC, From 4a850a0079ce601c0c4016f4edb7d618e811ed7d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:14 +0200 Subject: [PATCH 387/423] ALSA: ice1724: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 314f6dbb1f33 ("ALSA: ice1724: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-18-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ice1712/ice1724.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index f6275868877a..6fab2ad85bbe 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -2519,8 +2519,8 @@ static int snd_vt1724_create(struct snd_card *card, * */ -static int snd_vt1724_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2662,6 +2662,12 @@ static int snd_vt1724_probe(struct pci_dev *pci, return 0; } +static int snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vt1724_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_vt1724_suspend(struct device *dev) { From 71b21f5f8970a87f034138454ebeff0608d24875 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:15 +0200 Subject: [PATCH 388/423] ALSA: intel8x0: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 7835e0901e24 ("ALSA: intel8x0: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-19-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/intel8x0.c | 10 ++++++++-- sound/pci/intel8x0m.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index a51032b3ac4d..ae285c0a629c 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -3109,8 +3109,8 @@ static int check_default_spdif_aclink(struct pci_dev *pci) return 0; } -static int snd_intel8x0_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0 *chip; @@ -3189,6 +3189,12 @@ static int snd_intel8x0_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0_probe(pci, pci_id)); +} + static struct pci_driver intel8x0_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0_ids, diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 7de3cb2f17b5..2845cc006d0c 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1178,8 +1178,8 @@ static struct shortname_table { { 0 }, }; -static int snd_intel8x0m_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0m *chip; @@ -1225,6 +1225,12 @@ static int snd_intel8x0m_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0m_probe(pci, pci_id)); +} + static struct pci_driver intel8x0m_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0m_ids, From c01b723a56ce18ae66ff18c5803942badc15fbcd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:16 +0200 Subject: [PATCH 389/423] ALSA: korg1212: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: b5cde369b618 ("ALSA: korg1212: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-20-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/korg1212/korg1212.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 5c9e240ff6a9..33b4f95d65b3 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2355,7 +2355,7 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_korg1212_create(card, pci); if (err < 0) - return err; + goto error; strcpy(card->driver, "korg1212"); strcpy(card->shortname, "korg1212"); @@ -2366,10 +2366,14 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver korg1212_driver = { From ae86bf5c2a8d81418eadf1c31dd9253b609e3093 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:17 +0200 Subject: [PATCH 390/423] ALSA: maestro3: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 5c0939253c3c ("ALSA: maestro3: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-21-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/maestro3.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c index 056838ead21d..261850775c80 100644 --- a/sound/pci/maestro3.c +++ b/sound/pci/maestro3.c @@ -2637,7 +2637,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci, /* */ static int -snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2702,6 +2702,12 @@ snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_m3_probe(pci, pci_id)); +} + static struct pci_driver m3_driver = { .name = KBUILD_MODNAME, .id_table = snd_m3_ids, From 348f08de55b149e41a05111d1a713c4484e5a426 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:18 +0200 Subject: [PATCH 391/423] ALSA: riptide: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 546c201a891e ("ALSA: riptide: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-22-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/riptide/riptide.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index 5a987c683c41..b37c877c2c16 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2023,7 +2023,7 @@ static void snd_riptide_joystick_remove(struct pci_dev *pci) #endif static int -snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2124,6 +2124,12 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_riptide_probe(pci, pci_id)); +} + static struct pci_driver driver = { .name = KBUILD_MODNAME, .id_table = snd_riptide_ids, From 55d2d046b23b9bcb907f6b3e38e52113d55085eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:19 +0200 Subject: [PATCH 392/423] ALSA: rme32: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 102e6156ded2 ("ALSA: rme32: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-23-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c index 5b6bd9f0b2f7..9c0ac025e143 100644 --- a/sound/pci/rme32.c +++ b/sound/pci/rme32.c @@ -1875,7 +1875,7 @@ static void snd_rme32_card_free(struct snd_card *card) } static int -snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct rme32 *rme32; @@ -1927,6 +1927,12 @@ snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme32_probe(pci, pci_id)); +} + static struct pci_driver rme32_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme32_ids, From 93b884f8d82f08c7af542703a724cc23cd2d5bfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:20 +0200 Subject: [PATCH 393/423] ALSA: rme96: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: df06df7cc997 ("ALSA: rme96: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-24-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme96.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 8fc811504920..bccb7e0d3d11 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -2430,8 +2430,8 @@ static void snd_rme96_card_free(struct snd_card *card) } static int -snd_rme96_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct rme96 *rme96; @@ -2498,6 +2498,12 @@ snd_rme96_probe(struct pci_dev *pci, return 0; } +static int snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme96_probe(pci, pci_id)); +} + static struct pci_driver rme96_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme96_ids, From b087a381d7386ec95803222d0d9b1ac499550713 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:21 +0200 Subject: [PATCH 394/423] ALSA: sonicvibes: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 2ca6cbde6ad7 ("ALSA: sonicvibes: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-25-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/sonicvibes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c index c8c49881008f..f91cbf6eeca0 100644 --- a/sound/pci/sonicvibes.c +++ b/sound/pci/sonicvibes.c @@ -1387,8 +1387,8 @@ static int snd_sonicvibes_midi(struct sonicvibes *sonic, return 0; } -static int snd_sonic_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1459,6 +1459,12 @@ static int snd_sonic_probe(struct pci_dev *pci, return 0; } +static int snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sonic_probe(pci, pci_id)); +} + static struct pci_driver sonicvibes_driver = { .name = KBUILD_MODNAME, .id_table = snd_sonic_ids, From 27a0963f9cea5be3c68281f07fe82cdf712ef333 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:22 +0200 Subject: [PATCH 395/423] ALSA: via82xx: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: afaf99751d0c ("ALSA: via82xx: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-26-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 10 ++++++++-- sound/pci/via82xx_modem.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 65514f7e42d7..361b83fd721e 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2458,8 +2458,8 @@ static int check_dxs_list(struct pci_dev *pci, int revision) return VIA_DXS_48K; }; -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx *chip; @@ -2569,6 +2569,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_ids, diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c index 234f7fbed236..ca7f024bf8ec 100644 --- a/sound/pci/via82xx_modem.c +++ b/sound/pci/via82xx_modem.c @@ -1103,8 +1103,8 @@ static int snd_via82xx_create(struct snd_card *card, } -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx_modem *chip; @@ -1157,6 +1157,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_modem_ids, From 5e154dfb4f9995096aa6d342df75040ae802c17e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:23 +0200 Subject: [PATCH 396/423] ALSA: intel_hdmi: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 854577ac2aea ("ALSA: x86: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-27-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index b00634663346..0d828e35b401 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1652,7 +1652,7 @@ static void hdmi_lpe_audio_free(struct snd_card *card) * This function is called when the i915 driver creates the * hdmi-lpe-audio platform device. */ -static int hdmi_lpe_audio_probe(struct platform_device *pdev) +static int __hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; struct snd_intelhad_card *card_ctx; @@ -1815,6 +1815,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) return 0; } +static int hdmi_lpe_audio_probe(struct platform_device *pdev) +{ + return snd_card_free_on_error(&pdev->dev, __hdmi_lpe_audio_probe(pdev)); +} + static const struct dev_pm_ops hdmi_lpe_audio_pm = { SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume) }; From 2236a3243ff8291e97c70097dd11a0fdb8904380 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:24 +0200 Subject: [PATCH 397/423] ALSA: sis7019: Fix the missing error handling The previous cleanup with devres forgot to replace the snd_card_free() call with the devm version. Moreover, it still needs the manual call of snd_card_free() at the probe error path, otherwise the reverse order of the releases may happen. This patch addresses those issues. Fixes: 499ddc16394c ("ALSA: sis7019: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-28-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/sis7019.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c index 0b722b0e0604..fabe393607f8 100644 --- a/sound/pci/sis7019.c +++ b/sound/pci/sis7019.c @@ -1331,8 +1331,8 @@ static int sis_chip_create(struct snd_card *card, return 0; } -static int snd_sis7019_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct sis7019 *sis; @@ -1352,8 +1352,8 @@ static int snd_sis7019_probe(struct pci_dev *pci, if (!codecs) codecs = SIS_PRIMARY_CODEC_PRESENT; - rc = snd_card_new(&pci->dev, index, id, THIS_MODULE, - sizeof(*sis), &card); + rc = snd_devm_card_new(&pci->dev, index, id, THIS_MODULE, + sizeof(*sis), &card); if (rc < 0) return rc; @@ -1386,6 +1386,12 @@ static int snd_sis7019_probe(struct pci_dev *pci, return 0; } +static int snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sis7019_probe(pci, pci_id)); +} + static struct pci_driver sis7019_driver = { .name = KBUILD_MODNAME, .id_table = snd_sis7019_ids, From f0438155273f057fec9818bc9d1b782ba35cf6a1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:25 +0200 Subject: [PATCH 398/423] ALSA: bt87x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 9e80ed64a006 ("ALSA: bt87x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-29-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/bt87x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index d23f93163841..621985bfee5d 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -805,8 +805,8 @@ static int snd_bt87x_detect_card(struct pci_dev *pci) return SND_BT87X_BOARD_UNKNOWN; } -static int snd_bt87x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -889,6 +889,12 @@ static int snd_bt87x_probe(struct pci_dev *pci, return 0; } +static int snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_bt87x_probe(pci, pci_id)); +} + /* default entries for all Bt87x cards - it's not exported */ /* driver_data is set to 0 to call detection */ static const struct pci_device_id snd_bt87x_default_ids[] = { From d04e84b9817c652002f0ee9b42059d41493e9118 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:26 +0200 Subject: [PATCH 399/423] ALSA: lola: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 098fe3d6e775 ("ALSA: lola: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-30-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/lola/lola.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c index 5269a1d396a5..1aa30e90b86a 100644 --- a/sound/pci/lola/lola.c +++ b/sound/pci/lola/lola.c @@ -637,8 +637,8 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev) return 0; } -static int lola_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -687,6 +687,12 @@ static int lola_probe(struct pci_dev *pci, return 0; } +static int lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __lola_probe(pci, pci_id)); +} + /* PCI IDs */ static const struct pci_device_id lola_ids[] = { { PCI_VDEVICE(DIGIGRAM, 0x0001) }, From ab8bce9da6102c575c473c053672547589bc4c59 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:27 +0200 Subject: [PATCH 400/423] ALSA: als300: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 21a9314cf93b ("ALSA: als300: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-31-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/als300.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/als300.c b/sound/pci/als300.c index b86565dcdbe4..c70aff060120 100644 --- a/sound/pci/als300.c +++ b/sound/pci/als300.c @@ -708,7 +708,7 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_als300_create(card, pci, chip_type); if (err < 0) - return err; + goto error; strcpy(card->driver, "ALS300"); if (chip->chip_type == DEVICE_ALS300_PLUS) @@ -723,11 +723,15 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver als300_driver = { From bf4067e8a19eae67c45659a956c361d59251ba57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:28 +0200 Subject: [PATCH 401/423] ALSA: aw2: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 33631012cd06 ("ALSA: aw2: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-32-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/aw2/aw2-alsa.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index d56f126d6fdd..29a4bcdec237 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -275,7 +275,7 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (3) Create main component */ err = snd_aw2_create(card, pci); if (err < 0) - return err; + goto error; /* initialize mutex */ mutex_init(&chip->mtx); @@ -294,13 +294,17 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (6) Register card instance */ err = snd_card_register(card); if (err < 0) - return err; + goto error; /* (7) Set PCI driver data */ pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } /* open callback */ From a59396b1c11823c69c31621198c04def17f3a869 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:29 +0200 Subject: [PATCH 402/423] ALSA: cmipci: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 87e082ad84a7 ("ALSA: cmipci: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-33-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cmipci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index dab801d9d3b4..727db6d43391 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -3247,15 +3247,19 @@ static int snd_cmipci_probe(struct pci_dev *pci, err = snd_cmipci_create(card, pci, dev); if (err < 0) - return err; + goto error; err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } #ifdef CONFIG_PM_SLEEP From 60797a21dd8360a99ba797f8ca587087c07bb54c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:30 +0200 Subject: [PATCH 403/423] ALSA: lx6464es: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 6f16c19b115e ("ALSA: lx6464es: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-34-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/lx6464es/lx6464es.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index 168a1084f730..bd9b6148dd6f 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -1019,7 +1019,7 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_lx6464es_create(card, pci); if (err < 0) { dev_err(card->dev, "error during snd_lx6464es_create\n"); - return err; + goto error; } strcpy(card->driver, "LX6464ES"); @@ -1036,12 +1036,16 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; dev_dbg(chip->card->dev, "initialization successful\n"); pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver lx6464es_driver = { From 6ebc16e206aa82ddb0450c907865c55bcb7c0f43 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:31 +0200 Subject: [PATCH 404/423] ALSA: oxygen: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 596ae97ab0ce ("ALSA: oxygen: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-35-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/oxygen/oxygen_lib.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 4fb3f2484fdb..92ffe9dc20c5 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -576,7 +576,7 @@ static void oxygen_card_free(struct snd_card *card) mutex_destroy(&chip->mutex); } -int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, +static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id, struct module *owner, const struct pci_device_id *ids, int (*get_model)(struct oxygen *chip, @@ -701,6 +701,16 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, pci_set_drvdata(pci, card); return 0; } + +int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, + struct module *owner, + const struct pci_device_id *ids, + int (*get_model)(struct oxygen *chip, + const struct pci_device_id *id)) +{ + return snd_card_free_on_error(&pci->dev, + __oxygen_pci_probe(pci, index, id, owner, ids, get_model)); +} EXPORT_SYMBOL(oxygen_pci_probe); #ifdef CONFIG_PM_SLEEP From e2263f0bf7443a200a5c1c418baefd92f1674600 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:32 +0200 Subject: [PATCH 405/423] ALSA: hdsp: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: d136b8e54f92 ("ALSA: hdsp: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-36-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdsp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 96c12dfb24cf..3db641318d3a 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -5444,17 +5444,21 @@ static int snd_hdsp_probe(struct pci_dev *pci, hdsp->pci = pci; err = snd_hdsp_create(card, hdsp); if (err) - return err; + goto error; strcpy(card->shortname, "Hammerfall DSP"); sprintf(card->longname, "%s at 0x%lx, irq %d", hdsp->card_name, hdsp->port, hdsp->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdsp_driver = { From eab521aebcdeb1c801009503e3a7f8989e3c6b36 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:33 +0200 Subject: [PATCH 406/423] ALSA: hdspm: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 0195ca5fd1f4 ("ALSA: hdspm: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-37-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index ff06ee82607c..fa1812e7a49d 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -6895,7 +6895,7 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_hdspm_create(card, hdspm); if (err < 0) - return err; + goto error; if (hdspm->io_type != MADIface) { snprintf(card->shortname, sizeof(card->shortname), "%s_%x", @@ -6914,12 +6914,16 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdspm_driver = { From b2aa4f80693b7841e5ac4eadbd2d8cec56b10a51 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:34 +0200 Subject: [PATCH 407/423] ALSA: rme9652: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: b1002b2d41c5 ("ALSA: rme9652: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-38-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/rme9652.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 7755e19aa776..1d614fe89a6a 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -2572,7 +2572,7 @@ static int snd_rme9652_probe(struct pci_dev *pci, rme9652->pci = pci; err = snd_rme9652_create(card, rme9652, precise_ptr[dev]); if (err) - return err; + goto error; strcpy(card->shortname, rme9652->card_name); @@ -2580,10 +2580,14 @@ static int snd_rme9652_probe(struct pci_dev *pci, card->shortname, rme9652->port, rme9652->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver rme9652_driver = { From 4fb27190879b82e48ce89a56e9d6c04437dbc065 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:35 +0200 Subject: [PATCH 408/423] ALSA: mtpav: Don't call card private_free at probe error path The card destructor of nm256 driver does merely stopping the running timer, and it's superfluous for the probe error handling. Moreover, calling this via the previous devres change would lead to another problem due to the reverse call order. This patch moves the setup of the private_free callback after the card registration, so that it can be used only after fully set up. Fixes: aa92050f10f0 ("ALSA: mtpav: Allocate resources with device-managed APIs") Link: https://lore.kernel.org/r/20220412102636.16000-39-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/drivers/mtpav.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index 11235baaf6fa..f212f233ea61 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -693,8 +693,6 @@ static int snd_mtpav_probe(struct platform_device *dev) mtp_card->outmidihwport = 0xffffffff; timer_setup(&mtp_card->timer, snd_mtpav_output_timer, 0); - card->private_free = snd_mtpav_free; - err = snd_mtpav_get_RAWMIDI(mtp_card); if (err < 0) return err; @@ -716,6 +714,8 @@ static int snd_mtpav_probe(struct platform_device *dev) if (err < 0) return err; + card->private_free = snd_mtpav_free; + platform_set_drvdata(dev, card); printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port); return 0; From f20ae5074dfb38f23b0c07c62bdf8e7254a0acf8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:36 +0200 Subject: [PATCH 409/423] ALSA: nm256: Don't call card private_free at probe error path The card destructor of nm256 driver does merely stopping the running streams, and it's superfluous for the probe error handling. Moreover, calling this via the previous devres change would lead to another problem due to the reverse call order. This patch moves the setup of the private_free callback after the card registration, so that it can be used only after fully set up. Fixes: c19935f04784 ("ALSA: nm256: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-40-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/nm256/nm256.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c index c9c178504959..f99a1e96e923 100644 --- a/sound/pci/nm256/nm256.c +++ b/sound/pci/nm256/nm256.c @@ -1573,7 +1573,6 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci) chip->coeffs_current = 0; snd_nm256_init_chip(chip); - card->private_free = snd_nm256_free; // pci_set_master(pci); /* needed? */ return 0; @@ -1680,6 +1679,7 @@ static int snd_nm256_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) return err; + card->private_free = snd_nm256_free; pci_set_drvdata(pci, card); return 0; From c40160f2998c897231f8454bf797558d30a20375 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 6 Apr 2022 00:28:15 +0200 Subject: [PATCH 410/423] gcc-plugins: latent_entropy: use /dev/urandom While the latent entropy plugin mostly doesn't derive entropy from get_random_const() for measuring the call graph, when __latent_entropy is applied to a constant, then it's initialized statically to output from get_random_const(). In that case, this data is derived from a 64-bit seed, which means a buffer of 512 bits doesn't really have that amount of compile-time entropy. This patch fixes that shortcoming by just buffering chunks of /dev/urandom output and doling it out as requested. At the same time, it's important that we don't break the use of -frandom-seed, for people who want the runtime benefits of the latent entropy plugin, while still having compile-time determinism. In that case, we detect whether gcc's set_random_seed() has been called by making a call to get_random_seed(noinit=true) in the plugin init function, which is called after set_random_seed() is called but before anything that calls get_random_seed(noinit=false), and seeing if it's zero or not. If it's not zero, we're in deterministic mode, and so we just generate numbers with a basic xorshift prng. Note that we don't detect if -frandom-seed is being used using the documented local_tick variable, because it's assigned via: local_tick = (unsigned) tv.tv_sec * 1000 + tv.tv_usec / 1000; which may well overflow and become -1 on its own, and so isn't reliable: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105171 [kees: The 256 byte rnd_buf size was chosen based on average (250), median (64), and std deviation (575) bytes of used entropy for a defconfig x86_64 build] Fixes: 38addce8b600 ("gcc-plugins: Add latent_entropy plugin") Cc: stable@vger.kernel.org Cc: PaX Team Signed-off-by: Jason A. Donenfeld Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220405222815.21155-1-Jason@zx2c4.com --- scripts/gcc-plugins/latent_entropy_plugin.c | 44 +++++++++++++-------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 589454bce930..8425da41de0d 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = { .help = "disable\tturn off latent entropy instrumentation\n", }; -static unsigned HOST_WIDE_INT seed; -/* - * get_random_seed() (this is a GCC function) generates the seed. - * This is a simple random generator without any cryptographic security because - * the entropy doesn't come from here. - */ +static unsigned HOST_WIDE_INT deterministic_seed; +static unsigned HOST_WIDE_INT rnd_buf[32]; +static size_t rnd_idx = ARRAY_SIZE(rnd_buf); +static int urandom_fd = -1; + static unsigned HOST_WIDE_INT get_random_const(void) { - unsigned int i; - unsigned HOST_WIDE_INT ret = 0; - - for (i = 0; i < 8 * sizeof(ret); i++) { - ret = (ret << 1) | (seed & 1); - seed >>= 1; - if (ret & 1) - seed ^= 0xD800000000000000ULL; + if (deterministic_seed) { + unsigned HOST_WIDE_INT w = deterministic_seed; + w ^= w << 13; + w ^= w >> 7; + w ^= w << 17; + deterministic_seed = w; + return deterministic_seed; } - return ret; + if (urandom_fd < 0) { + urandom_fd = open("/dev/urandom", O_RDONLY); + gcc_assert(urandom_fd >= 0); + } + if (rnd_idx >= ARRAY_SIZE(rnd_buf)) { + gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf)); + rnd_idx = 0; + } + return rnd_buf[rnd_idx++]; } static tree tree_get_random_const(tree type) @@ -537,8 +543,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused, tree type, id; int quals; - seed = get_random_seed(false); - if (in_lto_p) return; @@ -573,6 +577,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const struct plugin_argument * const argv = plugin_info->argv; int i; + /* + * Call get_random_seed() with noinit=true, so that this returns + * 0 in the case where no seed has been passed via -frandom-seed. + */ + deterministic_seed = get_random_seed(true); + static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = { { .base = &latent_entropy_decl, From 932aba1e169090357a77af18850a10c256b50819 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 12 Apr 2022 05:41:00 -0400 Subject: [PATCH 411/423] stat: fix inconsistency between struct stat and struct compat_stat struct stat (defined in arch/x86/include/uapi/asm/stat.h) has 32-bit st_dev and st_rdev; struct compat_stat (defined in arch/x86/include/asm/compat.h) has 16-bit st_dev and st_rdev followed by a 16-bit padding. This patch fixes struct compat_stat to match struct stat. [ Historical note: the old x86 'struct stat' did have that 16-bit field that the compat layer had kept around, but it was changes back in 2003 by "struct stat - support larger dev_t": https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/?id=e95b2065677fe32512a597a79db94b77b90c968d and back in those days, the x86_64 port was still new, and separate from the i386 code, and had already picked up the old version with a 16-bit st_dev field ] Note that we can't change compat_dev_t because it is used by compat_loop_info. Also, if the st_dev and st_rdev values are 32-bit, we don't have to use old_valid_dev to test if the value fits into them. This fixes -EOVERFLOW on filesystems that are on NVMe because NVMe uses the major number 259. Signed-off-by: Mikulas Patocka Cc: Andreas Schwab Cc: Matthew Wilcox Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/x86/include/asm/compat.h | 6 ++---- fs/stat.c | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 7516e4199b3c..20fd0acd7d80 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -28,15 +28,13 @@ typedef u16 compat_ipc_pid_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; diff --git a/fs/stat.c b/fs/stat.c index 7f734be0e57e..5c2c94464e8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -348,9 +348,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat # define choose_32_64(a,b) b #endif -#define valid_dev(x) choose_32_64(old_valid_dev(x),true) -#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) - #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif @@ -359,7 +356,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; - if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) @@ -367,7 +366,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) #endif INIT_STRUCT_STAT_PADDING(tmp); - tmp.st_dev = encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -377,7 +376,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; @@ -665,11 +664,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -679,7 +680,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = old_encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; From 925ca893b4a65177394581737b95d03fea2660f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Apr 2022 07:48:08 +0200 Subject: [PATCH 412/423] ALSA: memalloc: Add fallback SG-buffer allocations for x86 The recent change for memory allocator replaced the SG-buffer handling helper for x86 with the standard non-contiguous page handler. This works for most cases, but there is a corner case I obviously overlooked, namely, the fallback of non-contiguous handler without IOMMU. When the system runs without IOMMU, the core handler tries to use the continuous pages with a single SGL entry. It works nicely for most cases, but when the system memory gets fragmented, the large allocation may fail frequently. Ideally the non-contig handler could deal with the proper SG pages, it's cumbersome to extend for now. As a workaround, here we add new types for (minimalistic) SG allocations, instead, so that the allocator falls back to those types automatically when the allocation with the standard API failed. BTW, one better (but pretty minor) improvement from the previous SG-buffer code is that this provides the proper mmap support without the PCM's page fault handling. Fixes: 2c95b92ecd92 ("ALSA: memalloc: Unify x86 SG-buffer handling (take#3)") BugLink: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/2272 BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1198248 Cc: Link: https://lore.kernel.org/r/20220413054808.7547-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/memalloc.h | 5 ++ sound/core/memalloc.c | 111 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 653dfffb3ac8..8d79cebf95f3 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -51,6 +51,11 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ #define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC #endif +/* fallback types, don't use those directly */ +#ifdef CONFIG_SND_DMA_SGBUF +#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK 10 +#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK 11 +#endif /* * info for buffer allocation diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 6fd763d4d15b..15dc7160ba34 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -499,6 +499,10 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { }; #endif /* CONFIG_X86 */ +#ifdef CONFIG_SND_DMA_SGBUF +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); +#endif + /* * Non-contiguous pages allocator */ @@ -509,8 +513,18 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); - if (!sgt) + if (!sgt) { +#ifdef CONFIG_SND_DMA_SGBUF + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + else + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + return snd_dma_sg_fallback_alloc(dmab, size); +#else return NULL; +#endif + } + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); @@ -633,6 +647,8 @@ static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size) if (!p) return NULL; + if (dmab->dev.type != SNDRV_DMA_TYPE_DEV_WC_SG) + return p; for_each_sgtable_page(sgt, &iter, 0) set_memory_wc(sg_wc_address(&iter), 1); return p; @@ -665,6 +681,95 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { .get_page = snd_dma_noncontig_get_page, .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; + +/* Fallback SG-buffer allocations for x86 */ +struct snd_dma_sg_fallback { + size_t count; + struct page **pages; + dma_addr_t *addrs; +}; + +static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, + struct snd_dma_sg_fallback *sgbuf) +{ + size_t i; + + if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wb(sgbuf->pages, sgbuf->count); + for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) + dma_free_coherent(dmab->dev.dev, PAGE_SIZE, + page_address(sgbuf->pages[i]), + sgbuf->addrs[i]); + kvfree(sgbuf->pages); + kvfree(sgbuf->addrs); + kfree(sgbuf); +} + +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + struct snd_dma_sg_fallback *sgbuf; + struct page **pages; + size_t i, count; + void *p; + + sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); + if (!sgbuf) + return NULL; + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + goto error; + sgbuf->pages = pages; + sgbuf->addrs = kvcalloc(count, sizeof(*sgbuf->addrs), GFP_KERNEL); + if (!sgbuf->addrs) + goto error; + + for (i = 0; i < count; sgbuf->count++, i++) { + p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE, + &sgbuf->addrs[i], DEFAULT_GFP); + if (!p) + goto error; + sgbuf->pages[i] = virt_to_page(p); + } + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wc(pages, count); + p = vmap(pages, count, VM_MAP, PAGE_KERNEL); + if (!p) + goto error; + dmab->private_data = sgbuf; + return p; + + error: + __snd_dma_sg_fallback_free(dmab, sgbuf); + return NULL; +} + +static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) +{ + vunmap(dmab->area); + __snd_dma_sg_fallback_free(dmab, dmab->private_data); +} + +static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, + struct vm_area_struct *area) +{ + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); + return vm_map_pages(area, sgbuf->pages, sgbuf->count); +} + +static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { + .alloc = snd_dma_sg_fallback_alloc, + .free = snd_dma_sg_fallback_free, + .mmap = snd_dma_sg_fallback_mmap, + /* reuse vmalloc helpers */ + .get_addr = snd_dma_vmalloc_get_addr, + .get_page = snd_dma_vmalloc_get_page, + .get_chunk_size = snd_dma_vmalloc_get_chunk_size, +}; #endif /* CONFIG_SND_DMA_SGBUF */ /* @@ -736,6 +841,10 @@ static const struct snd_malloc_ops *dma_ops[] = { #ifdef CONFIG_GENERIC_ALLOCATOR [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops, #endif /* CONFIG_GENERIC_ALLOCATOR */ +#ifdef CONFIG_SND_DMA_SGBUF + [SNDRV_DMA_TYPE_DEV_SG_FALLBACK] = &snd_dma_sg_fallback_ops, + [SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK] = &snd_dma_sg_fallback_ops, +#endif #endif /* CONFIG_HAS_DMA */ }; From 24d0c9f0e7de95fe3e3e0067cbea1cd5d413244b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 15:07:40 +0200 Subject: [PATCH 413/423] ALSA: usb-audio: Limit max buffer and period sizes per time In the previous fix, we increased the max buffer bytes from 1MB to 4MB so that we can use bigger buffers for the modern HiFi devices with higher rates, more channels and wider formats. OTOH, extending this has a concern that too big buffer is allowed for the lower rates, less channels and narrower formats; when an application tries to allocate as big buffer as possible, it'll lead to unexpectedly too huge size. Also, we had a problem about the inconsistent max buffer and period bytes for the implicit feedback mode when both streams have different channels. This was fixed by the (relatively complex) patch to reduce the max buffer and period bytes accordingly. This is an alternative fix for those, a patch to kill two birds with one stone (*): instead of increasing the max buffer bytes blindly and applying the reduction per channels, we simply use the hw constraints for the buffer and period "time". Meanwhile the max buffer and period bytes are set unlimited instead. Since the inconsistency of buffer (and period) bytes comes from the difference of the channels in the tied streams, as long as we care only about the buffer (and period) time, it doesn't matter; the buffer time is same for different channels, although we still allow higher buffer size. Similarly, this will allow more buffer bytes for HiFi devices while it also keeps the reasonable size for the legacy devices, too. As of this patch, the max period and buffer time are set to 1 and 2 seconds, which should be large enough for all possible use cases. (*) No animals were harmed in the making of this patch. Fixes: 98c27add5d96 ("ALSA: usb-audio: Cap upper limits of buffer/period bytes for implicit fb") Fixes: fee2ec8cceb3 ("ALSA: usb-audio: Increase max buffer size") Link: https://lore.kernel.org/r/20220412130740.18933-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 101 +++++++----------------------------------------- 1 file changed, 14 insertions(+), 87 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 37ee6df8b15a..6d699065e81a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,9 +659,6 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif -#define MAX_BUFFER_BYTES (4 * 1024 * 1024) -#define MAX_PERIOD_BYTES (512 * 1024) - static const struct snd_pcm_hardware snd_usb_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -672,9 +669,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, - .buffer_bytes_max = MAX_BUFFER_BYTES, + .buffer_bytes_max = INT_MAX, /* limited by BUFFER_TIME later */ .period_bytes_min = 64, - .period_bytes_max = MAX_PERIOD_BYTES, + .period_bytes_max = INT_MAX, /* limited by PERIOD_TIME later */ .periods_min = 2, .periods_max = 1024, }; @@ -974,78 +971,6 @@ static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, ep->cur_buffer_periods); } -/* get the adjusted max buffer (or period) bytes that can fit with the - * paired format for implicit fb - */ -static unsigned int -get_adjusted_max_bytes(struct snd_usb_substream *subs, - struct snd_usb_substream *pair, - struct snd_pcm_hw_params *params, - unsigned int max_bytes, - bool reverse_map) -{ - const struct audioformat *fp, *pp; - unsigned int rmax = 0, r; - - list_for_each_entry(fp, &subs->fmt_list, list) { - if (!fp->implicit_fb) - continue; - if (!reverse_map && - !hw_check_valid_format(subs, params, fp)) - continue; - list_for_each_entry(pp, &pair->fmt_list, list) { - if (pp->iface != fp->sync_iface || - pp->altsetting != fp->sync_altsetting || - pp->ep_idx != fp->sync_ep_idx) - continue; - if (reverse_map && - !hw_check_valid_format(pair, params, pp)) - break; - if (!reverse_map && pp->channels > fp->channels) - r = max_bytes * fp->channels / pp->channels; - else if (reverse_map && pp->channels < fp->channels) - r = max_bytes * pp->channels / fp->channels; - else - r = max_bytes; - rmax = max(rmax, r); - break; - } - } - return rmax; -} - -/* Reduce the period or buffer bytes depending on the paired substream; - * when a paired configuration for implicit fb has a higher number of channels, - * we need to reduce the max size accordingly, otherwise it may become unusable - */ -static int hw_rule_bytes_implicit_fb(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_usb_substream *subs = rule->private; - struct snd_usb_substream *pair; - struct snd_interval *it; - unsigned int max_bytes; - unsigned int rmax; - - pair = &subs->stream->substream[!subs->direction]; - if (!pair->ep_num) - return 0; - - if (rule->var == SNDRV_PCM_HW_PARAM_PERIOD_BYTES) - max_bytes = MAX_PERIOD_BYTES; - else - max_bytes = MAX_BUFFER_BYTES; - - rmax = get_adjusted_max_bytes(subs, pair, params, max_bytes, false); - if (!rmax) - rmax = get_adjusted_max_bytes(pair, subs, params, max_bytes, true); - if (!rmax) - return 0; - - it = hw_param_interval(params, rule->var); - return apply_hw_params_minmax(it, 0, rmax); -} - /* * set up the runtime hardware information. */ @@ -1139,6 +1064,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; } + /* set max period and buffer sizes for 1 and 2 seconds, respectively */ + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_TIME, + 0, 1000000); + if (err < 0) + return err; + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_BUFFER_TIME, + 0, 2000000); + if (err < 0) + return err; + /* additional hw constraints for implicit fb */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, hw_rule_format_implicit_fb, subs, @@ -1160,16 +1097,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, - hw_rule_bytes_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1); - if (err < 0) - return err; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, - hw_rule_bytes_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_PERIOD_BYTES, -1); - if (err < 0) - return err; list_for_each_entry(fp, &subs->fmt_list, list) { if (fp->implicit_fb) { From e16b859872b87650bb55b12cca5a5fcdc49c1442 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 12 Apr 2022 11:34:57 +0200 Subject: [PATCH 414/423] macvlan: Fix leaking skb in source mode with nodst option The MACVLAN receive handler clones skbs to all matching source MACVLAN interfaces, before it passes the packet along to match on destination based MACVLANs. When using the MACVLAN nodst mode, passing the packet to destination based MACVLANs is omitted and the handler returns with RX_HANDLER_CONSUMED. However, the passed skb is not freed, leaking for any packet processed with the nodst option. Properly free the skb when consuming packets to fix that leak. Fixes: 427f0c8c194b ("macvlan: Add nodst option to macvlan type source") Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 069e8824c264..b00bc8173abe 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; *pskb = skb; eth = eth_hdr(skb); - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } src = macvlan_hash_lookup(port, eth->h_source); if (src && src->mode != MACVLAN_MODE_VEPA && src->mode != MACVLAN_MODE_BRIDGE) { @@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; } - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); From 762c2998c9625f642f0d23da7d3f7e4f90665fdf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Apr 2022 12:44:26 +0300 Subject: [PATCH 415/423] Revert "net: dsa: setup master before ports" This reverts commit 11fd667dac315ea3f2469961f6d2869271a46cae. dsa_slave_change_mtu() updates the MTU of the DSA master and of the associated CPU port, but only if it detects a change to the master MTU. The blamed commit in the Fixes: tag below addressed a regression where dsa_slave_change_mtu() would return early and not do anything due to ds->ops->port_change_mtu() not being implemented. However, that commit also had the effect that the master MTU got set up to the correct value by dsa_master_setup(), but the associated CPU port's MTU did not get updated. This causes breakage for drivers that rely on the ->port_change_mtu() DSA call to account for the tagging overhead on the CPU port, and don't set up the initial MTU during the setup phase. Things actually worked before because they were in a fragile equilibrium where dsa_slave_change_mtu() was called before dsa_master_setup() was. So dsa_slave_change_mtu() could actually detect a change and update the CPU port MTU too. Restore the code to the way things used to work by reverting the reorder of dsa_tree_setup_master() and dsa_tree_setup_ports(). That change did not have a concrete motivation going for it anyway, it just looked better. Fixes: 066dfc429040 ("Revert "net: dsa: stop updating master MTU from master.c"") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ca6af86964bc..cf933225df32 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -562,7 +562,6 @@ static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; - struct net_device *slave; if (!dp->setup) return; @@ -584,11 +583,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - slave = dp->slave; - - if (slave) { + if (dp->slave) { + dsa_slave_destroy(dp->slave); dp->slave = NULL; - dsa_slave_destroy(slave); } break; } @@ -1147,17 +1144,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_cpu_ports; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) goto teardown_switches; - err = dsa_tree_setup_ports(dst); + err = dsa_tree_setup_master(dst); if (err) - goto teardown_master; + goto teardown_ports; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_ports; + goto teardown_master; dst->setup = true; @@ -1165,10 +1162,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_ports: - dsa_tree_teardown_ports(dst); teardown_master: dsa_tree_teardown_master(dst); +teardown_ports: + dsa_tree_teardown_ports(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: @@ -1186,10 +1183,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_ports(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); From 3d2504524531990b32a0629cc984db44f399d161 Mon Sep 17 00:00:00 2001 From: Dylan Hung Date: Tue, 12 Apr 2022 19:48:59 +0800 Subject: [PATCH 416/423] net: ftgmac100: access hardware register after clock ready AST2600 MAC register 0x58 is writable only when the MAC clock is enabled. Usually, the MAC clock is enabled by the bootloader so register 0x58 is set normally when the bootloader is involved. To make ast2600 ftgmac100 work without the bootloader, postpone the register write until the clock is ready. Fixes: 137d23cea1c0 ("net: ftgmac100: Fix Aspeed ast2600 TX hang issue") Signed-off-by: Dylan Hung Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index d5356db7539a..caf48023f8ea 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1835,11 +1835,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->rxdes0_edorr_mask = BIT(30); priv->txdes0_edotr_mask = BIT(30); priv->is_aspeed = true; - /* Disable ast2600 problematic HW arbitration */ - if (of_device_is_compatible(np, "aspeed,ast2600-mac")) { - iowrite32(FTGMAC100_TM_DEFAULT, - priv->base + FTGMAC100_OFFSET_TM); - } } else { priv->rxdes0_edorr_mask = BIT(15); priv->txdes0_edotr_mask = BIT(15); @@ -1911,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev) err = ftgmac100_setup_clk(priv); if (err) goto err_phy_connect; + + /* Disable ast2600 problematic HW arbitration */ + if (of_device_is_compatible(np, "aspeed,ast2600-mac")) + iowrite32(FTGMAC100_TM_DEFAULT, + priv->base + FTGMAC100_OFFSET_TM); } /* Default ring sizes */ From 2511e0c87786f333c4665508f421ac99e378c719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 12 Apr 2022 17:55:27 +0200 Subject: [PATCH 417/423] net: dsa: realtek: fix Kconfig to assure consistent driver linkage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel test robot reported a build failure: or1k-linux-ld: drivers/net/dsa/realtek/realtek-smi.o:(.rodata+0x16c): undefined reference to `rtl8366rb_variant' ... with the following build configuration: CONFIG_NET_DSA_REALTEK=y CONFIG_NET_DSA_REALTEK_SMI=y CONFIG_NET_DSA_REALTEK_RTL8365MB=y CONFIG_NET_DSA_REALTEK_RTL8366RB=m The problem here is that the realtek-smi interface driver gets built-in, while the rtl8366rb switch subdriver gets built as a module, hence the symbol rtl8366rb_variant is not reachable when defining the OF device table in the interface driver. The Kconfig dependencies don't help in this scenario because they just say that the subdriver(s) depend on at least one interface driver. In fact, the subdrivers don't depend on the interface drivers at all, and can even be built even in their absence. Somewhat strangely, the interface drivers can also be built in the absence of any subdriver, BUT, if a subdriver IS enabled, then it must be reachable according to the linkage of the interface driver: effectively what the IS_REACHABLE() macro achieves. If it is not reachable, the above kind of linker error will be observed. Rather than papering over the above build error by simply using IS_REACHABLE(), we can do a little better and admit that it is actually the interface drivers that have a dependency on the subdrivers. So this patch does exactly that. Specifically, we ensure that: 1. The interface drivers' Kconfig symbols must have a value no greater than the value of any subdriver Kconfig symbols. 2. The subdrivers should by default enable both interface drivers, since most users probably want at least one of them; those interface drivers can be explicitly disabled however. What this doesn't do is prevent a user from building only a subdriver, without any interface driver. To that end, add an additional line of help in the menu to guide users in the right direction. Link: https://lore.kernel.org/all/202204110757.XIafvVnj-lkp@intel.com/ Reported-by: kernel test robot Fixes: aac94001067d ("net: dsa: realtek: add new mdio interface for drivers") Signed-off-by: Alvin Šipraga Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/Kconfig | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 1aa79735355f..060165a85fb7 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -9,34 +9,46 @@ menuconfig NET_DSA_REALTEK help Select to enable support for Realtek Ethernet switch chips. + Note that at least one interface driver must be enabled for the + subdrivers to be loaded. Moreover, an interface driver cannot achieve + anything without at least one subdriver enabled. + +if NET_DSA_REALTEK + config NET_DSA_REALTEK_MDIO - tristate "Realtek MDIO connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek MDIO interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches configured through MDIO. config NET_DSA_REALTEK_SMI - tristate "Realtek SMI connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek SMI interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches connected through SMI. config NET_DSA_REALTEK_RTL8365MB tristate "Realtek RTL8365MB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL8_4 help Select to enable support for Realtek RTL8365MB-VC and RTL8367S. config NET_DSA_REALTEK_RTL8366RB tristate "Realtek RTL8366RB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL4_A help - Select to enable support for Realtek RTL8366RB + Select to enable support for Realtek RTL8366RB. + +endif From 8e925de60ddaeccb455f0bdad17ce9d8cc2db2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 12 Apr 2022 17:57:49 +0200 Subject: [PATCH 418/423] net: dsa: realtek: don't parse compatible string for RTL8366S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This switch is not even supported, but if someone were to actually put this compatible string "realtek,rtl8366s" in their device tree, they would be greeted with a kernel panic because the probe function would dereference NULL. So let's just remove it. Link: https://lore.kernel.org/all/CACRpkdYdKZs0WExXc3=0yPNOwP+oOV60HRz7SRoGjZvYHaT=1g@mail.gmail.com/ Signed-off-by: Alvin Šipraga Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/realtek-smi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index 2243d3da55b2..6cec559c90ce 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -546,11 +546,6 @@ static const struct of_device_id realtek_smi_of_match[] = { .data = &rtl8366rb_variant, }, #endif - { - /* FIXME: add support for RTL8366S and more */ - .compatible = "realtek,rtl8366s", - .data = NULL, - }, #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) { .compatible = "realtek,rtl8365mb", From ef27324e2cb7bb24542d6cb2571740eefe6b00dc Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 13 Apr 2022 00:04:30 +0800 Subject: [PATCH 419/423] nfc: nci: add flush_workqueue to prevent uaf Our detector found a concurrent use-after-free bug when detaching an NCI device. The main reason for this bug is the unexpected scheduling between the used delayed mechanism (timer and workqueue). The race can be demonstrated below: Thread-1 Thread-2 | nci_dev_up() | nci_open_device() | __nci_request(nci_reset_req) | nci_send_cmd | queue_work(cmd_work) nci_unregister_device() | nci_close_device() | ... del_timer_sync(cmd_timer)[1] | ... | Worker nci_free_device() | nci_cmd_work() kfree(ndev)[3] | mod_timer(cmd_timer)[2] In short, the cleanup routine thought that the cmd_timer has already been detached by [1] but the mod_timer can re-attach the timer [2], even it is already released [3], resulting in UAF. This UAF is easy to trigger, crash trace by POC is like below [ 66.703713] ================================================================== [ 66.703974] BUG: KASAN: use-after-free in enqueue_timer+0x448/0x490 [ 66.703974] Write of size 8 at addr ffff888009fb7058 by task kworker/u4:1/33 [ 66.703974] [ 66.703974] CPU: 1 PID: 33 Comm: kworker/u4:1 Not tainted 5.18.0-rc2 #5 [ 66.703974] Workqueue: nfc2_nci_cmd_wq nci_cmd_work [ 66.703974] Call Trace: [ 66.703974] [ 66.703974] dump_stack_lvl+0x57/0x7d [ 66.703974] print_report.cold+0x5e/0x5db [ 66.703974] ? enqueue_timer+0x448/0x490 [ 66.703974] kasan_report+0xbe/0x1c0 [ 66.703974] ? enqueue_timer+0x448/0x490 [ 66.703974] enqueue_timer+0x448/0x490 [ 66.703974] __mod_timer+0x5e6/0xb80 [ 66.703974] ? mark_held_locks+0x9e/0xe0 [ 66.703974] ? try_to_del_timer_sync+0xf0/0xf0 [ 66.703974] ? lockdep_hardirqs_on_prepare+0x17b/0x410 [ 66.703974] ? queue_work_on+0x61/0x80 [ 66.703974] ? lockdep_hardirqs_on+0xbf/0x130 [ 66.703974] process_one_work+0x8bb/0x1510 [ 66.703974] ? lockdep_hardirqs_on_prepare+0x410/0x410 [ 66.703974] ? pwq_dec_nr_in_flight+0x230/0x230 [ 66.703974] ? rwlock_bug.part.0+0x90/0x90 [ 66.703974] ? _raw_spin_lock_irq+0x41/0x50 [ 66.703974] worker_thread+0x575/0x1190 [ 66.703974] ? process_one_work+0x1510/0x1510 [ 66.703974] kthread+0x2a0/0x340 [ 66.703974] ? kthread_complete_and_exit+0x20/0x20 [ 66.703974] ret_from_fork+0x22/0x30 [ 66.703974] [ 66.703974] [ 66.703974] Allocated by task 267: [ 66.703974] kasan_save_stack+0x1e/0x40 [ 66.703974] __kasan_kmalloc+0x81/0xa0 [ 66.703974] nci_allocate_device+0xd3/0x390 [ 66.703974] nfcmrvl_nci_register_dev+0x183/0x2c0 [ 66.703974] nfcmrvl_nci_uart_open+0xf2/0x1dd [ 66.703974] nci_uart_tty_ioctl+0x2c3/0x4a0 [ 66.703974] tty_ioctl+0x764/0x1310 [ 66.703974] __x64_sys_ioctl+0x122/0x190 [ 66.703974] do_syscall_64+0x3b/0x90 [ 66.703974] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 66.703974] [ 66.703974] Freed by task 406: [ 66.703974] kasan_save_stack+0x1e/0x40 [ 66.703974] kasan_set_track+0x21/0x30 [ 66.703974] kasan_set_free_info+0x20/0x30 [ 66.703974] __kasan_slab_free+0x108/0x170 [ 66.703974] kfree+0xb0/0x330 [ 66.703974] nfcmrvl_nci_unregister_dev+0x90/0xd0 [ 66.703974] nci_uart_tty_close+0xdf/0x180 [ 66.703974] tty_ldisc_kill+0x73/0x110 [ 66.703974] tty_ldisc_hangup+0x281/0x5b0 [ 66.703974] __tty_hangup.part.0+0x431/0x890 [ 66.703974] tty_release+0x3a8/0xc80 [ 66.703974] __fput+0x1f0/0x8c0 [ 66.703974] task_work_run+0xc9/0x170 [ 66.703974] exit_to_user_mode_prepare+0x194/0x1a0 [ 66.703974] syscall_exit_to_user_mode+0x19/0x50 [ 66.703974] do_syscall_64+0x48/0x90 [ 66.703974] entry_SYSCALL_64_after_hwframe+0x44/0xae To fix the UAF, this patch adds flush_workqueue() to ensure the nci_cmd_work is finished before the following del_timer_sync. This combination will promise the timer is actually detached. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Signed-off-by: Lin Ma Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d2537383a3e8..6a193cce2a75 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -560,6 +560,10 @@ static int nci_close_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); From 968a1a5d6541cd24e37dadc1926eab9c10aeb09b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Apr 2022 15:58:52 +0200 Subject: [PATCH 420/423] tun: annotate access to queue->trans_start Commit 5337824f4dc4 ("net: annotate accesses to queue->trans_start") introduced a new helper, txq_trans_cond_update, to update queue->trans_start using WRITE_ONCE. One snippet in drivers/net/tun.c was missed, as it was introduced roughly at the same time. Fixes: 5337824f4dc4 ("net: annotate accesses to queue->trans_start") Cc: Eric Dumazet Signed-off-by: Antoine Tenart Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220412135852.466386-1-atenart@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 276a0e42ca8e..dbe4c0a4be2c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1124,7 +1124,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); - queue->trans_start = jiffies; + txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) From 00fa91bc9cc2a9d340f963af5e457610ad4b2f9c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Apr 2022 20:22:09 +0300 Subject: [PATCH 421/423] net: dsa: felix: fix tagging protocol changes with multiple CPU ports When the device tree has 2 CPU ports defined, a single one is active (has any dp->cpu_dp pointers point to it). Yet the second one is still a CPU port, and DSA still calls ->change_tag_protocol on it. On the NXP LS1028A, the CPU ports are ports 4 and 5. Port 4 is the active CPU port and port 5 is inactive. After the following commands: # Initial setting cat /sys/class/net/eno2/dsa/tagging ocelot echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging echo ocelot > /sys/class/net/eno2/dsa/tagging traffic is now broken, because the driver has moved the NPI port from port 4 to port 5, unbeknown to DSA. The problem can be avoided by detecting that the second CPU port is unused, and not doing anything for it. Further rework will be needed when proper support for multiple CPU ports is added. Treat this as a bug and prepare current kernels to work in single-CPU mode with multiple-CPU DT blobs. Fixes: adb3dccf090b ("net: dsa: felix: convert to the new .change_tag_protocol DSA API") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220412172209.2531865-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/ocelot/felix.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 413b0006e9a2..9e28219b223d 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -670,6 +670,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); enum dsa_tag_protocol old_proto = felix->tag_proto; + bool cpu_port_active = false; + struct dsa_port *dp; int err; if (proto != DSA_TAG_PROTO_SEVILLE && @@ -677,6 +679,27 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, proto != DSA_TAG_PROTO_OCELOT_8021Q) return -EPROTONOSUPPORT; + /* We don't support multiple CPU ports, yet the DT blob may have + * multiple CPU ports defined. The first CPU port is the active one, + * the others are inactive. In this case, DSA will call + * ->change_tag_protocol() multiple times, once per CPU port. + * Since we implement the tagging protocol change towards "ocelot" or + * "seville" as effectively initializing the NPI port, what we are + * doing is effectively changing who the NPI port is to the last @cpu + * argument passed, which is an unused DSA CPU port and not the one + * that should actively pass traffic. + * Suppress DSA's calls on CPU ports that are inactive. + */ + dsa_switch_for_each_user_port(dp, ds) { + if (dp->cpu_dp->index == cpu) { + cpu_port_active = true; + break; + } + } + + if (!cpu_port_active) + return 0; + felix_del_tag_protocol(ds, cpu, old_proto); err = felix_set_tag_protocol(ds, cpu, proto); From 23cfe941b52e2fa645bdfd770087128a74c7dbee Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 12 Apr 2022 22:25:06 +0200 Subject: [PATCH 422/423] rtnetlink: Fix handling of disabled L3 stats in RTM_GETSTATS replies When L3 stats are disabled, rtnl_offload_xstats_get_size_stats() returns size of 0, which is supposed to be an indication that the corresponding attribute should not be emitted. However, instead, the current code reserves a 0-byte attribute. The reason this does not show up as a citation on a kasan kernel is that netdev_offload_xstats_get(), which is supposed to fill in the data, never ends up getting called, because rtnl_offload_xstats_get_stats() notices that the stats are not actually used and skips the call. Thus a zero-length IFLA_OFFLOAD_XSTATS_L3_STATS attribute ends up in a response, confusing the userspace. Fix by skipping the L3-stats related block in rtnl_offload_xstats_fill(). Fixes: 0e7788fd7622 ("net: rtnetlink: Add UAPI for obtaining L3 offload xstats") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/591b58e7623edc3eb66dd1fcfa8c8f133d090974.1649794741.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 159c9c61e6af..d1381ea6d52e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5242,6 +5242,8 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, *prividx = attr_id_l3_stats; size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3); + if (!size_l3) + goto skip_l3_stats; attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) @@ -5253,6 +5255,7 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, return err; have_data = true; +skip_l3_stats: *prividx = 0; } From 2df3fc4a84e917a422935cc5bae18f43f9955d31 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 12 Apr 2022 16:04:20 -0500 Subject: [PATCH 423/423] net: bcmgenet: Revert "Use stronger register read/writes to assure ordering" It turns out after digging deeper into this bug, that it was being triggered by GCC12 failing to call the bcmgenet_enable_dma() routine. Given that a gcc12 fix has been merged [1] and the genet driver now works properly when built with gcc12, this commit should be reverted. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160 https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aabb9a261ef060cf24fd626713f1d7d9df81aa57 Fixes: 8d3ea3d402db ("net: bcmgenet: Use stronger register read/writes to assure ordering") Signed-off-by: Jeremy Linton Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220412210420.1129430-1-jeremy.linton@arm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2dd79af9411b..9a41145dadfc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(value, offset); else - writel(value, offset); + writel_relaxed(value, offset); } static inline u32 bcmgenet_readl(void __iomem *offset) @@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(offset); else - return readl(offset); + return readl_relaxed(offset); } static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,